res_cache.c revision 33a73bfa0712c483830e4c9abbf4d6a0551478bf
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "resolv_cache.h"
30#include <resolv.h>
31#include <stdlib.h>
32#include <string.h>
33#include <time.h>
34#include "pthread.h"
35
36#include <errno.h>
37#include <arpa/nameser.h>
38#include <sys/system_properties.h>
39#include <net/if.h>
40#include <netdb.h>
41#include <linux/if.h>
42
43#include <arpa/inet.h>
44#include "resolv_private.h"
45#include "resolv_netid.h"
46#include "res_private.h"
47
48/* This code implements a small and *simple* DNS resolver cache.
49 *
50 * It is only used to cache DNS answers for a time defined by the smallest TTL
51 * among the answer records in order to reduce DNS traffic. It is not supposed
52 * to be a full DNS cache, since we plan to implement that in the future in a
53 * dedicated process running on the system.
54 *
55 * Note that its design is kept simple very intentionally, i.e.:
56 *
57 *  - it takes raw DNS query packet data as input, and returns raw DNS
58 *    answer packet data as output
59 *
60 *    (this means that two similar queries that encode the DNS name
61 *     differently will be treated distinctly).
62 *
63 *    the smallest TTL value among the answer records are used as the time
64 *    to keep an answer in the cache.
65 *
66 *    this is bad, but we absolutely want to avoid parsing the answer packets
67 *    (and should be solved by the later full DNS cache process).
68 *
69 *  - the implementation is just a (query-data) => (answer-data) hash table
70 *    with a trivial least-recently-used expiration policy.
71 *
72 * Doing this keeps the code simple and avoids to deal with a lot of things
73 * that a full DNS cache is expected to do.
74 *
75 * The API is also very simple:
76 *
77 *   - the client calls _resolv_cache_get() to obtain a handle to the cache.
78 *     this will initialize the cache on first usage. the result can be NULL
79 *     if the cache is disabled.
80 *
81 *   - the client calls _resolv_cache_lookup() before performing a query
82 *
83 *     if the function returns RESOLV_CACHE_FOUND, a copy of the answer data
84 *     has been copied into the client-provided answer buffer.
85 *
86 *     if the function returns RESOLV_CACHE_NOTFOUND, the client should perform
87 *     a request normally, *then* call _resolv_cache_add() to add the received
88 *     answer to the cache.
89 *
90 *     if the function returns RESOLV_CACHE_UNSUPPORTED, the client should
91 *     perform a request normally, and *not* call _resolv_cache_add()
92 *
93 *     note that RESOLV_CACHE_UNSUPPORTED is also returned if the answer buffer
94 *     is too short to accomodate the cached result.
95 */
96
97/* the name of an environment variable that will be checked the first time
98 * this code is called if its value is "0", then the resolver cache is
99 * disabled.
100 */
101#define  CONFIG_ENV  "BIONIC_DNSCACHE"
102
103/* entries older than CONFIG_SECONDS seconds are always discarded.
104 */
105#define  CONFIG_SECONDS    (60*10)    /* 10 minutes */
106
107/* default number of entries kept in the cache. This value has been
108 * determined by browsing through various sites and counting the number
109 * of corresponding requests. Keep in mind that our framework is currently
110 * performing two requests per name lookup (one for IPv4, the other for IPv6)
111 *
112 *    www.google.com      4
113 *    www.ysearch.com     6
114 *    www.amazon.com      8
115 *    www.nytimes.com     22
116 *    www.espn.com        28
117 *    www.msn.com         28
118 *    www.lemonde.fr      35
119 *
120 * (determined in 2009-2-17 from Paris, France, results may vary depending
121 *  on location)
122 *
123 * most high-level websites use lots of media/ad servers with different names
124 * but these are generally reused when browsing through the site.
125 *
126 * As such, a value of 64 should be relatively comfortable at the moment.
127 *
128 * ******************************************
129 * * NOTE - this has changed.
130 * * 1) we've added IPv6 support so each dns query results in 2 responses
131 * * 2) we've made this a system-wide cache, so the cost is less (it's not
132 * *    duplicated in each process) and the need is greater (more processes
133 * *    making different requests).
134 * * Upping by 2x for IPv6
135 * * Upping by another 5x for the centralized nature
136 * *****************************************
137 */
138#define  CONFIG_MAX_ENTRIES    64 * 2 * 5
139/* name of the system property that can be used to set the cache size */
140
141/****************************************************************************/
142/****************************************************************************/
143/*****                                                                  *****/
144/*****                                                                  *****/
145/*****                                                                  *****/
146/****************************************************************************/
147/****************************************************************************/
148
149/* set to 1 to debug cache operations */
150#define  DEBUG       0
151
152/* set to 1 to debug query data */
153#define  DEBUG_DATA  0
154
155#undef XLOG
156#if DEBUG
157#  include "private/libc_logging.h"
158#  define XLOG(...)  __libc_format_log(ANDROID_LOG_DEBUG,"libc",__VA_ARGS__)
159
160#include <stdio.h>
161#include <stdarg.h>
162
163/** BOUNDED BUFFER FORMATTING
164 **/
165
166/* technical note:
167 *
168 *   the following debugging routines are used to append data to a bounded
169 *   buffer they take two parameters that are:
170 *
171 *   - p : a pointer to the current cursor position in the buffer
172 *         this value is initially set to the buffer's address.
173 *
174 *   - end : the address of the buffer's limit, i.e. of the first byte
175 *           after the buffer. this address should never be touched.
176 *
177 *           IMPORTANT: it is assumed that end > buffer_address, i.e.
178 *                      that the buffer is at least one byte.
179 *
180 *   the _bprint_() functions return the new value of 'p' after the data
181 *   has been appended, and also ensure the following:
182 *
183 *   - the returned value will never be strictly greater than 'end'
184 *
185 *   - a return value equal to 'end' means that truncation occured
186 *     (in which case, end[-1] will be set to 0)
187 *
188 *   - after returning from a _bprint_() function, the content of the buffer
189 *     is always 0-terminated, even in the event of truncation.
190 *
191 *  these conventions allow you to call _bprint_ functions multiple times and
192 *  only check for truncation at the end of the sequence, as in:
193 *
194 *     char  buff[1000], *p = buff, *end = p + sizeof(buff);
195 *
196 *     p = _bprint_c(p, end, '"');
197 *     p = _bprint_s(p, end, my_string);
198 *     p = _bprint_c(p, end, '"');
199 *
200 *     if (p >= end) {
201 *        // buffer was too small
202 *     }
203 *
204 *     printf( "%s", buff );
205 */
206
207/* add a char to a bounded buffer */
208static char*
209_bprint_c( char*  p, char*  end, int  c )
210{
211    if (p < end) {
212        if (p+1 == end)
213            *p++ = 0;
214        else {
215            *p++ = (char) c;
216            *p   = 0;
217        }
218    }
219    return p;
220}
221
222/* add a sequence of bytes to a bounded buffer */
223static char*
224_bprint_b( char*  p, char*  end, const char*  buf, int  len )
225{
226    int  avail = end - p;
227
228    if (avail <= 0 || len <= 0)
229        return p;
230
231    if (avail > len)
232        avail = len;
233
234    memcpy( p, buf, avail );
235    p += avail;
236
237    if (p < end)
238        p[0] = 0;
239    else
240        end[-1] = 0;
241
242    return p;
243}
244
245/* add a string to a bounded buffer */
246static char*
247_bprint_s( char*  p, char*  end, const char*  str )
248{
249    return _bprint_b(p, end, str, strlen(str));
250}
251
252/* add a formatted string to a bounded buffer */
253static char*
254_bprint( char*  p, char*  end, const char*  format, ... )
255{
256    int      avail, n;
257    va_list  args;
258
259    avail = end - p;
260
261    if (avail <= 0)
262        return p;
263
264    va_start(args, format);
265    n = vsnprintf( p, avail, format, args);
266    va_end(args);
267
268    /* certain C libraries return -1 in case of truncation */
269    if (n < 0 || n > avail)
270        n = avail;
271
272    p += n;
273    /* certain C libraries do not zero-terminate in case of truncation */
274    if (p == end)
275        p[-1] = 0;
276
277    return p;
278}
279
280/* add a hex value to a bounded buffer, up to 8 digits */
281static char*
282_bprint_hex( char*  p, char*  end, unsigned  value, int  numDigits )
283{
284    char   text[sizeof(unsigned)*2];
285    int    nn = 0;
286
287    while (numDigits-- > 0) {
288        text[nn++] = "0123456789abcdef"[(value >> (numDigits*4)) & 15];
289    }
290    return _bprint_b(p, end, text, nn);
291}
292
293/* add the hexadecimal dump of some memory area to a bounded buffer */
294static char*
295_bprint_hexdump( char*  p, char*  end, const uint8_t*  data, int  datalen )
296{
297    int   lineSize = 16;
298
299    while (datalen > 0) {
300        int  avail = datalen;
301        int  nn;
302
303        if (avail > lineSize)
304            avail = lineSize;
305
306        for (nn = 0; nn < avail; nn++) {
307            if (nn > 0)
308                p = _bprint_c(p, end, ' ');
309            p = _bprint_hex(p, end, data[nn], 2);
310        }
311        for ( ; nn < lineSize; nn++ ) {
312            p = _bprint_s(p, end, "   ");
313        }
314        p = _bprint_s(p, end, "  ");
315
316        for (nn = 0; nn < avail; nn++) {
317            int  c = data[nn];
318
319            if (c < 32 || c > 127)
320                c = '.';
321
322            p = _bprint_c(p, end, c);
323        }
324        p = _bprint_c(p, end, '\n');
325
326        data    += avail;
327        datalen -= avail;
328    }
329    return p;
330}
331
332/* dump the content of a query of packet to the log */
333static void
334XLOG_BYTES( const void*  base, int  len )
335{
336    char  buff[1024];
337    char*  p = buff, *end = p + sizeof(buff);
338
339    p = _bprint_hexdump(p, end, base, len);
340    XLOG("%s",buff);
341}
342
343#else /* !DEBUG */
344#  define  XLOG(...)        ((void)0)
345#  define  XLOG_BYTES(a,b)  ((void)0)
346#endif
347
348static time_t
349_time_now( void )
350{
351    struct timeval  tv;
352
353    gettimeofday( &tv, NULL );
354    return tv.tv_sec;
355}
356
357/* reminder: the general format of a DNS packet is the following:
358 *
359 *    HEADER  (12 bytes)
360 *    QUESTION  (variable)
361 *    ANSWER (variable)
362 *    AUTHORITY (variable)
363 *    ADDITIONNAL (variable)
364 *
365 * the HEADER is made of:
366 *
367 *   ID     : 16 : 16-bit unique query identification field
368 *
369 *   QR     :  1 : set to 0 for queries, and 1 for responses
370 *   Opcode :  4 : set to 0 for queries
371 *   AA     :  1 : set to 0 for queries
372 *   TC     :  1 : truncation flag, will be set to 0 in queries
373 *   RD     :  1 : recursion desired
374 *
375 *   RA     :  1 : recursion available (0 in queries)
376 *   Z      :  3 : three reserved zero bits
377 *   RCODE  :  4 : response code (always 0=NOERROR in queries)
378 *
379 *   QDCount: 16 : question count
380 *   ANCount: 16 : Answer count (0 in queries)
381 *   NSCount: 16: Authority Record count (0 in queries)
382 *   ARCount: 16: Additionnal Record count (0 in queries)
383 *
384 * the QUESTION is made of QDCount Question Record (QRs)
385 * the ANSWER is made of ANCount RRs
386 * the AUTHORITY is made of NSCount RRs
387 * the ADDITIONNAL is made of ARCount RRs
388 *
389 * Each Question Record (QR) is made of:
390 *
391 *   QNAME   : variable : Query DNS NAME
392 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
393 *   CLASS   : 16       : class of query (IN=1)
394 *
395 * Each Resource Record (RR) is made of:
396 *
397 *   NAME    : variable : DNS NAME
398 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
399 *   CLASS   : 16       : class of query (IN=1)
400 *   TTL     : 32       : seconds to cache this RR (0=none)
401 *   RDLENGTH: 16       : size of RDDATA in bytes
402 *   RDDATA  : variable : RR data (depends on TYPE)
403 *
404 * Each QNAME contains a domain name encoded as a sequence of 'labels'
405 * terminated by a zero. Each label has the following format:
406 *
407 *    LEN  : 8     : lenght of label (MUST be < 64)
408 *    NAME : 8*LEN : label length (must exclude dots)
409 *
410 * A value of 0 in the encoding is interpreted as the 'root' domain and
411 * terminates the encoding. So 'www.android.com' will be encoded as:
412 *
413 *   <3>www<7>android<3>com<0>
414 *
415 * Where <n> represents the byte with value 'n'
416 *
417 * Each NAME reflects the QNAME of the question, but has a slightly more
418 * complex encoding in order to provide message compression. This is achieved
419 * by using a 2-byte pointer, with format:
420 *
421 *    TYPE   : 2  : 0b11 to indicate a pointer, 0b01 and 0b10 are reserved
422 *    OFFSET : 14 : offset to another part of the DNS packet
423 *
424 * The offset is relative to the start of the DNS packet and must point
425 * A pointer terminates the encoding.
426 *
427 * The NAME can be encoded in one of the following formats:
428 *
429 *   - a sequence of simple labels terminated by 0 (like QNAMEs)
430 *   - a single pointer
431 *   - a sequence of simple labels terminated by a pointer
432 *
433 * A pointer shall always point to either a pointer of a sequence of
434 * labels (which can themselves be terminated by either a 0 or a pointer)
435 *
436 * The expanded length of a given domain name should not exceed 255 bytes.
437 *
438 * NOTE: we don't parse the answer packets, so don't need to deal with NAME
439 *       records, only QNAMEs.
440 */
441
442#define  DNS_HEADER_SIZE  12
443
444#define  DNS_TYPE_A   "\00\01"   /* big-endian decimal 1 */
445#define  DNS_TYPE_PTR "\00\014"  /* big-endian decimal 12 */
446#define  DNS_TYPE_MX  "\00\017"  /* big-endian decimal 15 */
447#define  DNS_TYPE_AAAA "\00\034" /* big-endian decimal 28 */
448#define  DNS_TYPE_ALL "\00\0377" /* big-endian decimal 255 */
449
450#define  DNS_CLASS_IN "\00\01"   /* big-endian decimal 1 */
451
452typedef struct {
453    const uint8_t*  base;
454    const uint8_t*  end;
455    const uint8_t*  cursor;
456} DnsPacket;
457
458static void
459_dnsPacket_init( DnsPacket*  packet, const uint8_t*  buff, int  bufflen )
460{
461    packet->base   = buff;
462    packet->end    = buff + bufflen;
463    packet->cursor = buff;
464}
465
466static void
467_dnsPacket_rewind( DnsPacket*  packet )
468{
469    packet->cursor = packet->base;
470}
471
472static void
473_dnsPacket_skip( DnsPacket*  packet, int  count )
474{
475    const uint8_t*  p = packet->cursor + count;
476
477    if (p > packet->end)
478        p = packet->end;
479
480    packet->cursor = p;
481}
482
483static int
484_dnsPacket_readInt16( DnsPacket*  packet )
485{
486    const uint8_t*  p = packet->cursor;
487
488    if (p+2 > packet->end)
489        return -1;
490
491    packet->cursor = p+2;
492    return (p[0]<< 8) | p[1];
493}
494
495/** QUERY CHECKING
496 **/
497
498/* check bytes in a dns packet. returns 1 on success, 0 on failure.
499 * the cursor is only advanced in the case of success
500 */
501static int
502_dnsPacket_checkBytes( DnsPacket*  packet, int  numBytes, const void*  bytes )
503{
504    const uint8_t*  p = packet->cursor;
505
506    if (p + numBytes > packet->end)
507        return 0;
508
509    if (memcmp(p, bytes, numBytes) != 0)
510        return 0;
511
512    packet->cursor = p + numBytes;
513    return 1;
514}
515
516/* parse and skip a given QNAME stored in a query packet,
517 * from the current cursor position. returns 1 on success,
518 * or 0 for malformed data.
519 */
520static int
521_dnsPacket_checkQName( DnsPacket*  packet )
522{
523    const uint8_t*  p   = packet->cursor;
524    const uint8_t*  end = packet->end;
525
526    for (;;) {
527        int  c;
528
529        if (p >= end)
530            break;
531
532        c = *p++;
533
534        if (c == 0) {
535            packet->cursor = p;
536            return 1;
537        }
538
539        /* we don't expect label compression in QNAMEs */
540        if (c >= 64)
541            break;
542
543        p += c;
544        /* we rely on the bound check at the start
545         * of the loop here */
546    }
547    /* malformed data */
548    XLOG("malformed QNAME");
549    return 0;
550}
551
552/* parse and skip a given QR stored in a packet.
553 * returns 1 on success, and 0 on failure
554 */
555static int
556_dnsPacket_checkQR( DnsPacket*  packet )
557{
558    if (!_dnsPacket_checkQName(packet))
559        return 0;
560
561    /* TYPE must be one of the things we support */
562    if (!_dnsPacket_checkBytes(packet, 2, DNS_TYPE_A) &&
563        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_PTR) &&
564        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_MX) &&
565        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_AAAA) &&
566        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_ALL))
567    {
568        XLOG("unsupported TYPE");
569        return 0;
570    }
571    /* CLASS must be IN */
572    if (!_dnsPacket_checkBytes(packet, 2, DNS_CLASS_IN)) {
573        XLOG("unsupported CLASS");
574        return 0;
575    }
576
577    return 1;
578}
579
580/* check the header of a DNS Query packet, return 1 if it is one
581 * type of query we can cache, or 0 otherwise
582 */
583static int
584_dnsPacket_checkQuery( DnsPacket*  packet )
585{
586    const uint8_t*  p = packet->base;
587    int             qdCount, anCount, dnCount, arCount;
588
589    if (p + DNS_HEADER_SIZE > packet->end) {
590        XLOG("query packet too small");
591        return 0;
592    }
593
594    /* QR must be set to 0, opcode must be 0 and AA must be 0 */
595    /* RA, Z, and RCODE must be 0 */
596    if ((p[2] & 0xFC) != 0 || p[3] != 0) {
597        XLOG("query packet flags unsupported");
598        return 0;
599    }
600
601    /* Note that we ignore the TC and RD bits here for the
602     * following reasons:
603     *
604     * - there is no point for a query packet sent to a server
605     *   to have the TC bit set, but the implementation might
606     *   set the bit in the query buffer for its own needs
607     *   between a _resolv_cache_lookup and a
608     *   _resolv_cache_add. We should not freak out if this
609     *   is the case.
610     *
611     * - we consider that the result from a RD=0 or a RD=1
612     *   query might be different, hence that the RD bit
613     *   should be used to differentiate cached result.
614     *
615     *   this implies that RD is checked when hashing or
616     *   comparing query packets, but not TC
617     */
618
619    /* ANCOUNT, DNCOUNT and ARCOUNT must be 0 */
620    qdCount = (p[4] << 8) | p[5];
621    anCount = (p[6] << 8) | p[7];
622    dnCount = (p[8] << 8) | p[9];
623    arCount = (p[10]<< 8) | p[11];
624
625    if (anCount != 0 || dnCount != 0 || arCount != 0) {
626        XLOG("query packet contains non-query records");
627        return 0;
628    }
629
630    if (qdCount == 0) {
631        XLOG("query packet doesn't contain query record");
632        return 0;
633    }
634
635    /* Check QDCOUNT QRs */
636    packet->cursor = p + DNS_HEADER_SIZE;
637
638    for (;qdCount > 0; qdCount--)
639        if (!_dnsPacket_checkQR(packet))
640            return 0;
641
642    return 1;
643}
644
645/** QUERY DEBUGGING
646 **/
647#if DEBUG
648static char*
649_dnsPacket_bprintQName(DnsPacket*  packet, char*  bp, char*  bend)
650{
651    const uint8_t*  p   = packet->cursor;
652    const uint8_t*  end = packet->end;
653    int             first = 1;
654
655    for (;;) {
656        int  c;
657
658        if (p >= end)
659            break;
660
661        c = *p++;
662
663        if (c == 0) {
664            packet->cursor = p;
665            return bp;
666        }
667
668        /* we don't expect label compression in QNAMEs */
669        if (c >= 64)
670            break;
671
672        if (first)
673            first = 0;
674        else
675            bp = _bprint_c(bp, bend, '.');
676
677        bp = _bprint_b(bp, bend, (const char*)p, c);
678
679        p += c;
680        /* we rely on the bound check at the start
681         * of the loop here */
682    }
683    /* malformed data */
684    bp = _bprint_s(bp, bend, "<MALFORMED>");
685    return bp;
686}
687
688static char*
689_dnsPacket_bprintQR(DnsPacket*  packet, char*  p, char*  end)
690{
691#define  QQ(x)   { DNS_TYPE_##x, #x }
692    static const struct {
693        const char*  typeBytes;
694        const char*  typeString;
695    } qTypes[] =
696    {
697        QQ(A), QQ(PTR), QQ(MX), QQ(AAAA), QQ(ALL),
698        { NULL, NULL }
699    };
700    int          nn;
701    const char*  typeString = NULL;
702
703    /* dump QNAME */
704    p = _dnsPacket_bprintQName(packet, p, end);
705
706    /* dump TYPE */
707    p = _bprint_s(p, end, " (");
708
709    for (nn = 0; qTypes[nn].typeBytes != NULL; nn++) {
710        if (_dnsPacket_checkBytes(packet, 2, qTypes[nn].typeBytes)) {
711            typeString = qTypes[nn].typeString;
712            break;
713        }
714    }
715
716    if (typeString != NULL)
717        p = _bprint_s(p, end, typeString);
718    else {
719        int  typeCode = _dnsPacket_readInt16(packet);
720        p = _bprint(p, end, "UNKNOWN-%d", typeCode);
721    }
722
723    p = _bprint_c(p, end, ')');
724
725    /* skip CLASS */
726    _dnsPacket_skip(packet, 2);
727    return p;
728}
729
730/* this function assumes the packet has already been checked */
731static char*
732_dnsPacket_bprintQuery( DnsPacket*  packet, char*  p, char*  end )
733{
734    int   qdCount;
735
736    if (packet->base[2] & 0x1) {
737        p = _bprint_s(p, end, "RECURSIVE ");
738    }
739
740    _dnsPacket_skip(packet, 4);
741    qdCount = _dnsPacket_readInt16(packet);
742    _dnsPacket_skip(packet, 6);
743
744    for ( ; qdCount > 0; qdCount-- ) {
745        p = _dnsPacket_bprintQR(packet, p, end);
746    }
747    return p;
748}
749#endif
750
751
752/** QUERY HASHING SUPPORT
753 **
754 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKET HAS ALREADY
755 ** BEEN SUCCESFULLY CHECKED.
756 **/
757
758/* use 32-bit FNV hash function */
759#define  FNV_MULT   16777619U
760#define  FNV_BASIS  2166136261U
761
762static unsigned
763_dnsPacket_hashBytes( DnsPacket*  packet, int  numBytes, unsigned  hash )
764{
765    const uint8_t*  p   = packet->cursor;
766    const uint8_t*  end = packet->end;
767
768    while (numBytes > 0 && p < end) {
769        hash = hash*FNV_MULT ^ *p++;
770    }
771    packet->cursor = p;
772    return hash;
773}
774
775
776static unsigned
777_dnsPacket_hashQName( DnsPacket*  packet, unsigned  hash )
778{
779    const uint8_t*  p   = packet->cursor;
780    const uint8_t*  end = packet->end;
781
782    for (;;) {
783        int  c;
784
785        if (p >= end) {  /* should not happen */
786            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
787            break;
788        }
789
790        c = *p++;
791
792        if (c == 0)
793            break;
794
795        if (c >= 64) {
796            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
797            break;
798        }
799        if (p + c >= end) {
800            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
801                    __FUNCTION__);
802            break;
803        }
804        while (c > 0) {
805            hash = hash*FNV_MULT ^ *p++;
806            c   -= 1;
807        }
808    }
809    packet->cursor = p;
810    return hash;
811}
812
813static unsigned
814_dnsPacket_hashQR( DnsPacket*  packet, unsigned  hash )
815{
816    hash = _dnsPacket_hashQName(packet, hash);
817    hash = _dnsPacket_hashBytes(packet, 4, hash); /* TYPE and CLASS */
818    return hash;
819}
820
821static unsigned
822_dnsPacket_hashQuery( DnsPacket*  packet )
823{
824    unsigned  hash = FNV_BASIS;
825    int       count;
826    _dnsPacket_rewind(packet);
827
828    /* we ignore the TC bit for reasons explained in
829     * _dnsPacket_checkQuery().
830     *
831     * however we hash the RD bit to differentiate
832     * between answers for recursive and non-recursive
833     * queries.
834     */
835    hash = hash*FNV_MULT ^ (packet->base[2] & 1);
836
837    /* assume: other flags are 0 */
838    _dnsPacket_skip(packet, 4);
839
840    /* read QDCOUNT */
841    count = _dnsPacket_readInt16(packet);
842
843    /* assume: ANcount, NScount, ARcount are 0 */
844    _dnsPacket_skip(packet, 6);
845
846    /* hash QDCOUNT QRs */
847    for ( ; count > 0; count-- )
848        hash = _dnsPacket_hashQR(packet, hash);
849
850    return hash;
851}
852
853
854/** QUERY COMPARISON
855 **
856 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKETS HAVE ALREADY
857 ** BEEN SUCCESFULLY CHECKED.
858 **/
859
860static int
861_dnsPacket_isEqualDomainName( DnsPacket*  pack1, DnsPacket*  pack2 )
862{
863    const uint8_t*  p1   = pack1->cursor;
864    const uint8_t*  end1 = pack1->end;
865    const uint8_t*  p2   = pack2->cursor;
866    const uint8_t*  end2 = pack2->end;
867
868    for (;;) {
869        int  c1, c2;
870
871        if (p1 >= end1 || p2 >= end2) {
872            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
873            break;
874        }
875        c1 = *p1++;
876        c2 = *p2++;
877        if (c1 != c2)
878            break;
879
880        if (c1 == 0) {
881            pack1->cursor = p1;
882            pack2->cursor = p2;
883            return 1;
884        }
885        if (c1 >= 64) {
886            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
887            break;
888        }
889        if ((p1+c1 > end1) || (p2+c1 > end2)) {
890            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
891                    __FUNCTION__);
892            break;
893        }
894        if (memcmp(p1, p2, c1) != 0)
895            break;
896        p1 += c1;
897        p2 += c1;
898        /* we rely on the bound checks at the start of the loop */
899    }
900    /* not the same, or one is malformed */
901    XLOG("different DN");
902    return 0;
903}
904
905static int
906_dnsPacket_isEqualBytes( DnsPacket*  pack1, DnsPacket*  pack2, int  numBytes )
907{
908    const uint8_t*  p1 = pack1->cursor;
909    const uint8_t*  p2 = pack2->cursor;
910
911    if ( p1 + numBytes > pack1->end || p2 + numBytes > pack2->end )
912        return 0;
913
914    if ( memcmp(p1, p2, numBytes) != 0 )
915        return 0;
916
917    pack1->cursor += numBytes;
918    pack2->cursor += numBytes;
919    return 1;
920}
921
922static int
923_dnsPacket_isEqualQR( DnsPacket*  pack1, DnsPacket*  pack2 )
924{
925    /* compare domain name encoding + TYPE + CLASS */
926    if ( !_dnsPacket_isEqualDomainName(pack1, pack2) ||
927         !_dnsPacket_isEqualBytes(pack1, pack2, 2+2) )
928        return 0;
929
930    return 1;
931}
932
933static int
934_dnsPacket_isEqualQuery( DnsPacket*  pack1, DnsPacket*  pack2 )
935{
936    int  count1, count2;
937
938    /* compare the headers, ignore most fields */
939    _dnsPacket_rewind(pack1);
940    _dnsPacket_rewind(pack2);
941
942    /* compare RD, ignore TC, see comment in _dnsPacket_checkQuery */
943    if ((pack1->base[2] & 1) != (pack2->base[2] & 1)) {
944        XLOG("different RD");
945        return 0;
946    }
947
948    /* assume: other flags are all 0 */
949    _dnsPacket_skip(pack1, 4);
950    _dnsPacket_skip(pack2, 4);
951
952    /* compare QDCOUNT */
953    count1 = _dnsPacket_readInt16(pack1);
954    count2 = _dnsPacket_readInt16(pack2);
955    if (count1 != count2 || count1 < 0) {
956        XLOG("different QDCOUNT");
957        return 0;
958    }
959
960    /* assume: ANcount, NScount and ARcount are all 0 */
961    _dnsPacket_skip(pack1, 6);
962    _dnsPacket_skip(pack2, 6);
963
964    /* compare the QDCOUNT QRs */
965    for ( ; count1 > 0; count1-- ) {
966        if (!_dnsPacket_isEqualQR(pack1, pack2)) {
967            XLOG("different QR");
968            return 0;
969        }
970    }
971    return 1;
972}
973
974/****************************************************************************/
975/****************************************************************************/
976/*****                                                                  *****/
977/*****                                                                  *****/
978/*****                                                                  *****/
979/****************************************************************************/
980/****************************************************************************/
981
982/* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this
983 * structure though they are conceptually part of the hash table.
984 *
985 * similarly, mru_next and mru_prev are part of the global MRU list
986 */
987typedef struct Entry {
988    unsigned int     hash;   /* hash value */
989    struct Entry*    hlink;  /* next in collision chain */
990    struct Entry*    mru_prev;
991    struct Entry*    mru_next;
992
993    const uint8_t*   query;
994    int              querylen;
995    const uint8_t*   answer;
996    int              answerlen;
997    time_t           expires;   /* time_t when the entry isn't valid any more */
998    int              id;        /* for debugging purpose */
999} Entry;
1000
1001/**
1002 * Find the TTL for a negative DNS result.  This is defined as the minimum
1003 * of the SOA records TTL and the MINIMUM-TTL field (RFC-2308).
1004 *
1005 * Return 0 if not found.
1006 */
1007static u_long
1008answer_getNegativeTTL(ns_msg handle) {
1009    int n, nscount;
1010    u_long result = 0;
1011    ns_rr rr;
1012
1013    nscount = ns_msg_count(handle, ns_s_ns);
1014    for (n = 0; n < nscount; n++) {
1015        if ((ns_parserr(&handle, ns_s_ns, n, &rr) == 0) && (ns_rr_type(rr) == ns_t_soa)) {
1016            const u_char *rdata = ns_rr_rdata(rr); // find the data
1017            const u_char *edata = rdata + ns_rr_rdlen(rr); // add the len to find the end
1018            int len;
1019            u_long ttl, rec_result = ns_rr_ttl(rr);
1020
1021            // find the MINIMUM-TTL field from the blob of binary data for this record
1022            // skip the server name
1023            len = dn_skipname(rdata, edata);
1024            if (len == -1) continue; // error skipping
1025            rdata += len;
1026
1027            // skip the admin name
1028            len = dn_skipname(rdata, edata);
1029            if (len == -1) continue; // error skipping
1030            rdata += len;
1031
1032            if (edata - rdata != 5*NS_INT32SZ) continue;
1033            // skip: serial number + refresh interval + retry interval + expiry
1034            rdata += NS_INT32SZ * 4;
1035            // finally read the MINIMUM TTL
1036            ttl = ns_get32(rdata);
1037            if (ttl < rec_result) {
1038                rec_result = ttl;
1039            }
1040            // Now that the record is read successfully, apply the new min TTL
1041            if (n == 0 || rec_result < result) {
1042                result = rec_result;
1043            }
1044        }
1045    }
1046    return result;
1047}
1048
1049/**
1050 * Parse the answer records and find the appropriate
1051 * smallest TTL among the records.  This might be from
1052 * the answer records if found or from the SOA record
1053 * if it's a negative result.
1054 *
1055 * The returned TTL is the number of seconds to
1056 * keep the answer in the cache.
1057 *
1058 * In case of parse error zero (0) is returned which
1059 * indicates that the answer shall not be cached.
1060 */
1061static u_long
1062answer_getTTL(const void* answer, int answerlen)
1063{
1064    ns_msg handle;
1065    int ancount, n;
1066    u_long result, ttl;
1067    ns_rr rr;
1068
1069    result = 0;
1070    if (ns_initparse(answer, answerlen, &handle) >= 0) {
1071        // get number of answer records
1072        ancount = ns_msg_count(handle, ns_s_an);
1073
1074        if (ancount == 0) {
1075            // a response with no answers?  Cache this negative result.
1076            result = answer_getNegativeTTL(handle);
1077        } else {
1078            for (n = 0; n < ancount; n++) {
1079                if (ns_parserr(&handle, ns_s_an, n, &rr) == 0) {
1080                    ttl = ns_rr_ttl(rr);
1081                    if (n == 0 || ttl < result) {
1082                        result = ttl;
1083                    }
1084                } else {
1085                    XLOG("ns_parserr failed ancount no = %d. errno = %s\n", n, strerror(errno));
1086                }
1087            }
1088        }
1089    } else {
1090        XLOG("ns_parserr failed. %s\n", strerror(errno));
1091    }
1092
1093    XLOG("TTL = %d\n", result);
1094
1095    return result;
1096}
1097
1098static void
1099entry_free( Entry*  e )
1100{
1101    /* everything is allocated in a single memory block */
1102    if (e) {
1103        free(e);
1104    }
1105}
1106
1107static __inline__ void
1108entry_mru_remove( Entry*  e )
1109{
1110    e->mru_prev->mru_next = e->mru_next;
1111    e->mru_next->mru_prev = e->mru_prev;
1112}
1113
1114static __inline__ void
1115entry_mru_add( Entry*  e, Entry*  list )
1116{
1117    Entry*  first = list->mru_next;
1118
1119    e->mru_next = first;
1120    e->mru_prev = list;
1121
1122    list->mru_next  = e;
1123    first->mru_prev = e;
1124}
1125
1126/* compute the hash of a given entry, this is a hash of most
1127 * data in the query (key) */
1128static unsigned
1129entry_hash( const Entry*  e )
1130{
1131    DnsPacket  pack[1];
1132
1133    _dnsPacket_init(pack, e->query, e->querylen);
1134    return _dnsPacket_hashQuery(pack);
1135}
1136
1137/* initialize an Entry as a search key, this also checks the input query packet
1138 * returns 1 on success, or 0 in case of unsupported/malformed data */
1139static int
1140entry_init_key( Entry*  e, const void*  query, int  querylen )
1141{
1142    DnsPacket  pack[1];
1143
1144    memset(e, 0, sizeof(*e));
1145
1146    e->query    = query;
1147    e->querylen = querylen;
1148    e->hash     = entry_hash(e);
1149
1150    _dnsPacket_init(pack, query, querylen);
1151
1152    return _dnsPacket_checkQuery(pack);
1153}
1154
1155/* allocate a new entry as a cache node */
1156static Entry*
1157entry_alloc( const Entry*  init, const void*  answer, int  answerlen )
1158{
1159    Entry*  e;
1160    int     size;
1161
1162    size = sizeof(*e) + init->querylen + answerlen;
1163    e    = calloc(size, 1);
1164    if (e == NULL)
1165        return e;
1166
1167    e->hash     = init->hash;
1168    e->query    = (const uint8_t*)(e+1);
1169    e->querylen = init->querylen;
1170
1171    memcpy( (char*)e->query, init->query, e->querylen );
1172
1173    e->answer    = e->query + e->querylen;
1174    e->answerlen = answerlen;
1175
1176    memcpy( (char*)e->answer, answer, e->answerlen );
1177
1178    return e;
1179}
1180
1181static int
1182entry_equals( const Entry*  e1, const Entry*  e2 )
1183{
1184    DnsPacket  pack1[1], pack2[1];
1185
1186    if (e1->querylen != e2->querylen) {
1187        return 0;
1188    }
1189    _dnsPacket_init(pack1, e1->query, e1->querylen);
1190    _dnsPacket_init(pack2, e2->query, e2->querylen);
1191
1192    return _dnsPacket_isEqualQuery(pack1, pack2);
1193}
1194
1195/****************************************************************************/
1196/****************************************************************************/
1197/*****                                                                  *****/
1198/*****                                                                  *****/
1199/*****                                                                  *****/
1200/****************************************************************************/
1201/****************************************************************************/
1202
1203/* We use a simple hash table with external collision lists
1204 * for simplicity, the hash-table fields 'hash' and 'hlink' are
1205 * inlined in the Entry structure.
1206 */
1207
1208/* Maximum time for a thread to wait for an pending request */
1209#define PENDING_REQUEST_TIMEOUT 20;
1210
1211typedef struct pending_req_info {
1212    unsigned int                hash;
1213    pthread_cond_t              cond;
1214    struct pending_req_info*    next;
1215} PendingReqInfo;
1216
1217typedef struct resolv_cache {
1218    int              max_entries;
1219    int              num_entries;
1220    Entry            mru_list;
1221    int              last_id;
1222    Entry*           entries;
1223    PendingReqInfo   pending_requests;
1224} Cache;
1225
1226struct resolv_cache_info {
1227    unsigned                    netid;
1228    Cache*                      cache;
1229    struct resolv_cache_info*   next;
1230    char*                       nameservers[MAXNS +1];
1231    struct addrinfo*            nsaddrinfo[MAXNS + 1];
1232    char                        defdname[256];
1233    int                         dnsrch_offset[MAXDNSRCH+1];  // offsets into defdname
1234};
1235
1236#define  HTABLE_VALID(x)  ((x) != NULL && (x) != HTABLE_DELETED)
1237
1238static pthread_once_t        _res_cache_once = PTHREAD_ONCE_INIT;
1239static void _res_cache_init(void);
1240
1241// lock protecting everything in the _resolve_cache_info structs (next ptr, etc)
1242static pthread_mutex_t _res_cache_list_lock;
1243
1244/* gets cache associated with a network, or NULL if none exists */
1245static struct resolv_cache* _find_named_cache_locked(unsigned netid);
1246
1247static void
1248_cache_flush_pending_requests_locked( struct resolv_cache* cache )
1249{
1250    struct pending_req_info *ri, *tmp;
1251    if (cache) {
1252        ri = cache->pending_requests.next;
1253
1254        while (ri) {
1255            tmp = ri;
1256            ri = ri->next;
1257            pthread_cond_broadcast(&tmp->cond);
1258
1259            pthread_cond_destroy(&tmp->cond);
1260            free(tmp);
1261        }
1262
1263        cache->pending_requests.next = NULL;
1264    }
1265}
1266
1267/* Return 0 if no pending request is found matching the key.
1268 * If a matching request is found the calling thread will wait until
1269 * the matching request completes, then update *cache and return 1. */
1270static int
1271_cache_check_pending_request_locked( struct resolv_cache** cache, Entry* key, unsigned netid )
1272{
1273    struct pending_req_info *ri, *prev;
1274    int exist = 0;
1275
1276    if (*cache && key) {
1277        ri = (*cache)->pending_requests.next;
1278        prev = &(*cache)->pending_requests;
1279        while (ri) {
1280            if (ri->hash == key->hash) {
1281                exist = 1;
1282                break;
1283            }
1284            prev = ri;
1285            ri = ri->next;
1286        }
1287
1288        if (!exist) {
1289            ri = calloc(1, sizeof(struct pending_req_info));
1290            if (ri) {
1291                ri->hash = key->hash;
1292                pthread_cond_init(&ri->cond, NULL);
1293                prev->next = ri;
1294            }
1295        } else {
1296            struct timespec ts = {0,0};
1297            XLOG("Waiting for previous request");
1298            ts.tv_sec = _time_now() + PENDING_REQUEST_TIMEOUT;
1299            pthread_cond_timedwait(&ri->cond, &_res_cache_list_lock, &ts);
1300            /* Must update *cache as it could have been deleted. */
1301            *cache = _find_named_cache_locked(netid);
1302        }
1303    }
1304
1305    return exist;
1306}
1307
1308/* notify any waiting thread that waiting on a request
1309 * matching the key has been added to the cache */
1310static void
1311_cache_notify_waiting_tid_locked( struct resolv_cache* cache, Entry* key )
1312{
1313    struct pending_req_info *ri, *prev;
1314
1315    if (cache && key) {
1316        ri = cache->pending_requests.next;
1317        prev = &cache->pending_requests;
1318        while (ri) {
1319            if (ri->hash == key->hash) {
1320                pthread_cond_broadcast(&ri->cond);
1321                break;
1322            }
1323            prev = ri;
1324            ri = ri->next;
1325        }
1326
1327        // remove item from list and destroy
1328        if (ri) {
1329            prev->next = ri->next;
1330            pthread_cond_destroy(&ri->cond);
1331            free(ri);
1332        }
1333    }
1334}
1335
1336/* notify the cache that the query failed */
1337void
1338_resolv_cache_query_failed( unsigned    netid,
1339                   const void* query,
1340                   int         querylen)
1341{
1342    Entry    key[1];
1343    Cache*   cache;
1344
1345    if (!entry_init_key(key, query, querylen))
1346        return;
1347
1348    pthread_mutex_lock(&_res_cache_list_lock);
1349
1350    cache = _find_named_cache_locked(netid);
1351
1352    if (cache) {
1353        _cache_notify_waiting_tid_locked(cache, key);
1354    }
1355
1356    pthread_mutex_unlock(&_res_cache_list_lock);
1357}
1358
1359static void
1360_cache_flush_locked( Cache*  cache )
1361{
1362    int     nn;
1363
1364    for (nn = 0; nn < cache->max_entries; nn++)
1365    {
1366        Entry**  pnode = (Entry**) &cache->entries[nn];
1367
1368        while (*pnode != NULL) {
1369            Entry*  node = *pnode;
1370            *pnode = node->hlink;
1371            entry_free(node);
1372        }
1373    }
1374
1375    // flush pending request
1376    _cache_flush_pending_requests_locked(cache);
1377
1378    cache->mru_list.mru_next = cache->mru_list.mru_prev = &cache->mru_list;
1379    cache->num_entries       = 0;
1380    cache->last_id           = 0;
1381
1382    XLOG("*************************\n"
1383         "*** DNS CACHE FLUSHED ***\n"
1384         "*************************");
1385}
1386
1387static int
1388_res_cache_get_max_entries( void )
1389{
1390    int cache_size = CONFIG_MAX_ENTRIES;
1391
1392    const char* cache_mode = getenv("ANDROID_DNS_MODE");
1393    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
1394        // Don't use the cache in local mode. This is used by the proxy itself.
1395        cache_size = 0;
1396    }
1397
1398    XLOG("cache size: %d", cache_size);
1399    return cache_size;
1400}
1401
1402static struct resolv_cache*
1403_resolv_cache_create( void )
1404{
1405    struct resolv_cache*  cache;
1406
1407    cache = calloc(sizeof(*cache), 1);
1408    if (cache) {
1409        cache->max_entries = _res_cache_get_max_entries();
1410        cache->entries = calloc(sizeof(*cache->entries), cache->max_entries);
1411        if (cache->entries) {
1412            cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list;
1413            XLOG("%s: cache created\n", __FUNCTION__);
1414        } else {
1415            free(cache);
1416            cache = NULL;
1417        }
1418    }
1419    return cache;
1420}
1421
1422
1423#if DEBUG
1424static void
1425_dump_query( const uint8_t*  query, int  querylen )
1426{
1427    char       temp[256], *p=temp, *end=p+sizeof(temp);
1428    DnsPacket  pack[1];
1429
1430    _dnsPacket_init(pack, query, querylen);
1431    p = _dnsPacket_bprintQuery(pack, p, end);
1432    XLOG("QUERY: %s", temp);
1433}
1434
1435static void
1436_cache_dump_mru( Cache*  cache )
1437{
1438    char    temp[512], *p=temp, *end=p+sizeof(temp);
1439    Entry*  e;
1440
1441    p = _bprint(temp, end, "MRU LIST (%2d): ", cache->num_entries);
1442    for (e = cache->mru_list.mru_next; e != &cache->mru_list; e = e->mru_next)
1443        p = _bprint(p, end, " %d", e->id);
1444
1445    XLOG("%s", temp);
1446}
1447
1448static void
1449_dump_answer(const void* answer, int answerlen)
1450{
1451    res_state statep;
1452    FILE* fp;
1453    char* buf;
1454    int fileLen;
1455
1456    fp = fopen("/data/reslog.txt", "w+");
1457    if (fp != NULL) {
1458        statep = __res_get_state();
1459
1460        res_pquery(statep, answer, answerlen, fp);
1461
1462        //Get file length
1463        fseek(fp, 0, SEEK_END);
1464        fileLen=ftell(fp);
1465        fseek(fp, 0, SEEK_SET);
1466        buf = (char *)malloc(fileLen+1);
1467        if (buf != NULL) {
1468            //Read file contents into buffer
1469            fread(buf, fileLen, 1, fp);
1470            XLOG("%s\n", buf);
1471            free(buf);
1472        }
1473        fclose(fp);
1474        remove("/data/reslog.txt");
1475    }
1476    else {
1477        errno = 0; // else debug is introducing error signals
1478        XLOG("%s: can't open file\n", __FUNCTION__);
1479    }
1480}
1481#endif
1482
1483#if DEBUG
1484#  define  XLOG_QUERY(q,len)   _dump_query((q), (len))
1485#  define  XLOG_ANSWER(a, len) _dump_answer((a), (len))
1486#else
1487#  define  XLOG_QUERY(q,len)   ((void)0)
1488#  define  XLOG_ANSWER(a,len)  ((void)0)
1489#endif
1490
1491/* This function tries to find a key within the hash table
1492 * In case of success, it will return a *pointer* to the hashed key.
1493 * In case of failure, it will return a *pointer* to NULL
1494 *
1495 * So, the caller must check '*result' to check for success/failure.
1496 *
1497 * The main idea is that the result can later be used directly in
1498 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup'
1499 * parameter. This makes the code simpler and avoids re-searching
1500 * for the key position in the htable.
1501 *
1502 * The result of a lookup_p is only valid until you alter the hash
1503 * table.
1504 */
1505static Entry**
1506_cache_lookup_p( Cache*   cache,
1507                 Entry*   key )
1508{
1509    int      index = key->hash % cache->max_entries;
1510    Entry**  pnode = (Entry**) &cache->entries[ index ];
1511
1512    while (*pnode != NULL) {
1513        Entry*  node = *pnode;
1514
1515        if (node == NULL)
1516            break;
1517
1518        if (node->hash == key->hash && entry_equals(node, key))
1519            break;
1520
1521        pnode = &node->hlink;
1522    }
1523    return pnode;
1524}
1525
1526/* Add a new entry to the hash table. 'lookup' must be the
1527 * result of an immediate previous failed _lookup_p() call
1528 * (i.e. with *lookup == NULL), and 'e' is the pointer to the
1529 * newly created entry
1530 */
1531static void
1532_cache_add_p( Cache*   cache,
1533              Entry**  lookup,
1534              Entry*   e )
1535{
1536    *lookup = e;
1537    e->id = ++cache->last_id;
1538    entry_mru_add(e, &cache->mru_list);
1539    cache->num_entries += 1;
1540
1541    XLOG("%s: entry %d added (count=%d)", __FUNCTION__,
1542         e->id, cache->num_entries);
1543}
1544
1545/* Remove an existing entry from the hash table,
1546 * 'lookup' must be the result of an immediate previous
1547 * and succesful _lookup_p() call.
1548 */
1549static void
1550_cache_remove_p( Cache*   cache,
1551                 Entry**  lookup )
1552{
1553    Entry*  e  = *lookup;
1554
1555    XLOG("%s: entry %d removed (count=%d)", __FUNCTION__,
1556         e->id, cache->num_entries-1);
1557
1558    entry_mru_remove(e);
1559    *lookup = e->hlink;
1560    entry_free(e);
1561    cache->num_entries -= 1;
1562}
1563
1564/* Remove the oldest entry from the hash table.
1565 */
1566static void
1567_cache_remove_oldest( Cache*  cache )
1568{
1569    Entry*   oldest = cache->mru_list.mru_prev;
1570    Entry**  lookup = _cache_lookup_p(cache, oldest);
1571
1572    if (*lookup == NULL) { /* should not happen */
1573        XLOG("%s: OLDEST NOT IN HTABLE ?", __FUNCTION__);
1574        return;
1575    }
1576    if (DEBUG) {
1577        XLOG("Cache full - removing oldest");
1578        XLOG_QUERY(oldest->query, oldest->querylen);
1579    }
1580    _cache_remove_p(cache, lookup);
1581}
1582
1583/* Remove all expired entries from the hash table.
1584 */
1585static void _cache_remove_expired(Cache* cache) {
1586    Entry* e;
1587    time_t now = _time_now();
1588
1589    for (e = cache->mru_list.mru_next; e != &cache->mru_list;) {
1590        // Entry is old, remove
1591        if (now >= e->expires) {
1592            Entry** lookup = _cache_lookup_p(cache, e);
1593            if (*lookup == NULL) { /* should not happen */
1594                XLOG("%s: ENTRY NOT IN HTABLE ?", __FUNCTION__);
1595                return;
1596            }
1597            e = e->mru_next;
1598            _cache_remove_p(cache, lookup);
1599        } else {
1600            e = e->mru_next;
1601        }
1602    }
1603}
1604
1605ResolvCacheStatus
1606_resolv_cache_lookup( unsigned              netid,
1607                      const void*           query,
1608                      int                   querylen,
1609                      void*                 answer,
1610                      int                   answersize,
1611                      int                  *answerlen )
1612{
1613    Entry      key[1];
1614    Entry**    lookup;
1615    Entry*     e;
1616    time_t     now;
1617    Cache*     cache;
1618
1619    ResolvCacheStatus  result = RESOLV_CACHE_NOTFOUND;
1620
1621    XLOG("%s: lookup", __FUNCTION__);
1622    XLOG_QUERY(query, querylen);
1623
1624    /* we don't cache malformed queries */
1625    if (!entry_init_key(key, query, querylen)) {
1626        XLOG("%s: unsupported query", __FUNCTION__);
1627        return RESOLV_CACHE_UNSUPPORTED;
1628    }
1629    /* lookup cache */
1630    pthread_once(&_res_cache_once, _res_cache_init);
1631    pthread_mutex_lock(&_res_cache_list_lock);
1632
1633    cache = _find_named_cache_locked(netid);
1634    if (cache == NULL) {
1635        result = RESOLV_CACHE_UNSUPPORTED;
1636        goto Exit;
1637    }
1638
1639    /* see the description of _lookup_p to understand this.
1640     * the function always return a non-NULL pointer.
1641     */
1642    lookup = _cache_lookup_p(cache, key);
1643    e      = *lookup;
1644
1645    if (e == NULL) {
1646        XLOG( "NOT IN CACHE");
1647        // calling thread will wait if an outstanding request is found
1648        // that matching this query
1649        if (!_cache_check_pending_request_locked(&cache, key, netid) || cache == NULL) {
1650            goto Exit;
1651        } else {
1652            lookup = _cache_lookup_p(cache, key);
1653            e = *lookup;
1654            if (e == NULL) {
1655                goto Exit;
1656            }
1657        }
1658    }
1659
1660    now = _time_now();
1661
1662    /* remove stale entries here */
1663    if (now >= e->expires) {
1664        XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup );
1665        XLOG_QUERY(e->query, e->querylen);
1666        _cache_remove_p(cache, lookup);
1667        goto Exit;
1668    }
1669
1670    *answerlen = e->answerlen;
1671    if (e->answerlen > answersize) {
1672        /* NOTE: we return UNSUPPORTED if the answer buffer is too short */
1673        result = RESOLV_CACHE_UNSUPPORTED;
1674        XLOG(" ANSWER TOO LONG");
1675        goto Exit;
1676    }
1677
1678    memcpy( answer, e->answer, e->answerlen );
1679
1680    /* bump up this entry to the top of the MRU list */
1681    if (e != cache->mru_list.mru_next) {
1682        entry_mru_remove( e );
1683        entry_mru_add( e, &cache->mru_list );
1684    }
1685
1686    XLOG( "FOUND IN CACHE entry=%p", e );
1687    result = RESOLV_CACHE_FOUND;
1688
1689Exit:
1690    pthread_mutex_unlock(&_res_cache_list_lock);
1691    return result;
1692}
1693
1694
1695void
1696_resolv_cache_add( unsigned              netid,
1697                   const void*           query,
1698                   int                   querylen,
1699                   const void*           answer,
1700                   int                   answerlen )
1701{
1702    Entry    key[1];
1703    Entry*   e;
1704    Entry**  lookup;
1705    u_long   ttl;
1706    Cache*   cache = NULL;
1707
1708    /* don't assume that the query has already been cached
1709     */
1710    if (!entry_init_key( key, query, querylen )) {
1711        XLOG( "%s: passed invalid query ?", __FUNCTION__);
1712        return;
1713    }
1714
1715    pthread_mutex_lock(&_res_cache_list_lock);
1716
1717    cache = _find_named_cache_locked(netid);
1718    if (cache == NULL) {
1719        goto Exit;
1720    }
1721
1722    XLOG( "%s: query:", __FUNCTION__ );
1723    XLOG_QUERY(query,querylen);
1724    XLOG_ANSWER(answer, answerlen);
1725#if DEBUG_DATA
1726    XLOG( "answer:");
1727    XLOG_BYTES(answer,answerlen);
1728#endif
1729
1730    lookup = _cache_lookup_p(cache, key);
1731    e      = *lookup;
1732
1733    if (e != NULL) { /* should not happen */
1734        XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1735             __FUNCTION__, e);
1736        goto Exit;
1737    }
1738
1739    if (cache->num_entries >= cache->max_entries) {
1740        _cache_remove_expired(cache);
1741        if (cache->num_entries >= cache->max_entries) {
1742            _cache_remove_oldest(cache);
1743        }
1744        /* need to lookup again */
1745        lookup = _cache_lookup_p(cache, key);
1746        e      = *lookup;
1747        if (e != NULL) {
1748            XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1749                __FUNCTION__, e);
1750            goto Exit;
1751        }
1752    }
1753
1754    ttl = answer_getTTL(answer, answerlen);
1755    if (ttl > 0) {
1756        e = entry_alloc(key, answer, answerlen);
1757        if (e != NULL) {
1758            e->expires = ttl + _time_now();
1759            _cache_add_p(cache, lookup, e);
1760        }
1761    }
1762#if DEBUG
1763    _cache_dump_mru(cache);
1764#endif
1765Exit:
1766    if (cache != NULL) {
1767      _cache_notify_waiting_tid_locked(cache, key);
1768    }
1769    pthread_mutex_unlock(&_res_cache_list_lock);
1770}
1771
1772/****************************************************************************/
1773/****************************************************************************/
1774/*****                                                                  *****/
1775/*****                                                                  *****/
1776/*****                                                                  *****/
1777/****************************************************************************/
1778/****************************************************************************/
1779
1780// Head of the list of caches.  Protected by _res_cache_list_lock.
1781static struct resolv_cache_info _res_cache_list;
1782
1783/* insert resolv_cache_info into the list of resolv_cache_infos */
1784static void _insert_cache_info_locked(struct resolv_cache_info* cache_info);
1785/* creates a resolv_cache_info */
1786static struct resolv_cache_info* _create_cache_info( void );
1787/* gets a resolv_cache_info associated with a network, or NULL if not found */
1788static struct resolv_cache_info* _find_cache_info_locked(unsigned netid);
1789/* look up the named cache, and creates one if needed */
1790static struct resolv_cache* _get_res_cache_for_net_locked(unsigned netid);
1791/* empty the named cache */
1792static void _flush_cache_for_net_locked(unsigned netid);
1793/* empty the nameservers set for the named cache */
1794static void _free_nameservers_locked(struct resolv_cache_info* cache_info);
1795/* return 1 if the provided list of name servers differs from the list of name servers
1796 * currently attached to the provided cache_info */
1797static int _resolv_is_nameservers_equal_locked(struct resolv_cache_info* cache_info,
1798        const char** servers, int numservers);
1799
1800static void
1801_res_cache_init(void)
1802{
1803    const char*  env = getenv(CONFIG_ENV);
1804
1805    if (env && atoi(env) == 0) {
1806        /* the cache is disabled */
1807        return;
1808    }
1809
1810    memset(&_res_cache_list, 0, sizeof(_res_cache_list));
1811    pthread_mutex_init(&_res_cache_list_lock, NULL);
1812}
1813
1814static struct resolv_cache*
1815_get_res_cache_for_net_locked(unsigned netid)
1816{
1817    struct resolv_cache* cache = _find_named_cache_locked(netid);
1818    if (!cache) {
1819        struct resolv_cache_info* cache_info = _create_cache_info();
1820        if (cache_info) {
1821            cache = _resolv_cache_create();
1822            if (cache) {
1823                cache_info->cache = cache;
1824                cache_info->netid = netid;
1825                _insert_cache_info_locked(cache_info);
1826            } else {
1827                free(cache_info);
1828            }
1829        }
1830    }
1831    return cache;
1832}
1833
1834void
1835_resolv_flush_cache_for_net(unsigned netid)
1836{
1837    pthread_once(&_res_cache_once, _res_cache_init);
1838    pthread_mutex_lock(&_res_cache_list_lock);
1839
1840    _flush_cache_for_net_locked(netid);
1841
1842    pthread_mutex_unlock(&_res_cache_list_lock);
1843}
1844
1845static void
1846_flush_cache_for_net_locked(unsigned netid)
1847{
1848    struct resolv_cache* cache = _find_named_cache_locked(netid);
1849    if (cache) {
1850        _cache_flush_locked(cache);
1851    }
1852}
1853
1854void _resolv_delete_cache_for_net(unsigned netid)
1855{
1856    pthread_once(&_res_cache_once, _res_cache_init);
1857    pthread_mutex_lock(&_res_cache_list_lock);
1858
1859    struct resolv_cache_info* prev_cache_info = &_res_cache_list;
1860
1861    while (prev_cache_info->next) {
1862        struct resolv_cache_info* cache_info = prev_cache_info->next;
1863
1864        if (cache_info->netid == netid) {
1865            prev_cache_info->next = cache_info->next;
1866            _cache_flush_locked(cache_info->cache);
1867            free(cache_info->cache->entries);
1868            free(cache_info->cache);
1869            _free_nameservers_locked(cache_info);
1870            free(cache_info);
1871            break;
1872        }
1873
1874        prev_cache_info = prev_cache_info->next;
1875    }
1876
1877    pthread_mutex_unlock(&_res_cache_list_lock);
1878}
1879
1880static struct resolv_cache_info*
1881_create_cache_info(void)
1882{
1883    struct resolv_cache_info* cache_info;
1884
1885    cache_info = calloc(sizeof(*cache_info), 1);
1886    return cache_info;
1887}
1888
1889static void
1890_insert_cache_info_locked(struct resolv_cache_info* cache_info)
1891{
1892    struct resolv_cache_info* last;
1893
1894    for (last = &_res_cache_list; last->next; last = last->next);
1895
1896    last->next = cache_info;
1897
1898}
1899
1900static struct resolv_cache*
1901_find_named_cache_locked(unsigned netid) {
1902
1903    struct resolv_cache_info* info = _find_cache_info_locked(netid);
1904
1905    if (info != NULL) return info->cache;
1906
1907    return NULL;
1908}
1909
1910static struct resolv_cache_info*
1911_find_cache_info_locked(unsigned netid)
1912{
1913    struct resolv_cache_info* cache_info = _res_cache_list.next;
1914
1915    while (cache_info) {
1916        if (cache_info->netid == netid) {
1917            break;
1918        }
1919
1920        cache_info = cache_info->next;
1921    }
1922    return cache_info;
1923}
1924
1925void
1926_resolv_set_nameservers_for_net(unsigned netid, const char** servers, int numservers,
1927        const char *domains)
1928{
1929    int i, rt, index;
1930    struct addrinfo hints;
1931    char sbuf[NI_MAXSERV];
1932    register char *cp;
1933    int *offset;
1934
1935    pthread_once(&_res_cache_once, _res_cache_init);
1936    pthread_mutex_lock(&_res_cache_list_lock);
1937
1938    // creates the cache if not created
1939    _get_res_cache_for_net_locked(netid);
1940
1941    struct resolv_cache_info* cache_info = _find_cache_info_locked(netid);
1942
1943    if (cache_info != NULL &&
1944            !_resolv_is_nameservers_equal_locked(cache_info, servers, numservers)) {
1945        // free current before adding new
1946        _free_nameservers_locked(cache_info);
1947
1948        memset(&hints, 0, sizeof(hints));
1949        hints.ai_family = PF_UNSPEC;
1950        hints.ai_socktype = SOCK_DGRAM; /*dummy*/
1951        hints.ai_flags = AI_NUMERICHOST;
1952        sprintf(sbuf, "%u", NAMESERVER_PORT);
1953
1954        index = 0;
1955        for (i = 0; i < numservers && i < MAXNS; i++) {
1956            rt = getaddrinfo(servers[i], sbuf, &hints, &cache_info->nsaddrinfo[index]);
1957            if (rt == 0) {
1958                cache_info->nameservers[index] = strdup(servers[i]);
1959                index++;
1960                XLOG("%s: netid = %u, addr = %s\n", __FUNCTION__, netid, servers[i]);
1961            } else {
1962                cache_info->nsaddrinfo[index] = NULL;
1963            }
1964        }
1965
1966        // code moved from res_init.c, load_domain_search_list
1967        strlcpy(cache_info->defdname, domains, sizeof(cache_info->defdname));
1968        if ((cp = strchr(cache_info->defdname, '\n')) != NULL)
1969            *cp = '\0';
1970        cp = cache_info->defdname;
1971        offset = cache_info->dnsrch_offset;
1972        while (offset < cache_info->dnsrch_offset + MAXDNSRCH) {
1973            while (*cp == ' ' || *cp == '\t') /* skip leading white space */
1974                cp++;
1975            if (*cp == '\0') /* stop if nothing more to do */
1976                break;
1977            *offset++ = cp - cache_info->defdname; /* record this search domain */
1978            while (*cp) { /* zero-terminate it */
1979                if (*cp == ' '|| *cp == '\t') {
1980                    *cp++ = '\0';
1981                    break;
1982                }
1983                cp++;
1984            }
1985        }
1986        *offset = -1; /* cache_info->dnsrch_offset has MAXDNSRCH+1 items */
1987
1988        // flush cache since new settings
1989        _flush_cache_for_net_locked(netid);
1990
1991    }
1992
1993    pthread_mutex_unlock(&_res_cache_list_lock);
1994}
1995
1996static int
1997_resolv_is_nameservers_equal_locked(struct resolv_cache_info* cache_info,
1998        const char** servers, int numservers)
1999{
2000    int i;
2001    char** ns;
2002    int currentservers;
2003    int equal = 1;
2004
2005    if (numservers > MAXNS) numservers = MAXNS;
2006
2007    // Find out how many nameservers we had before.
2008    currentservers = 0;
2009    for (ns = cache_info->nameservers; *ns; ns++)
2010        currentservers++;
2011
2012    if (currentservers != numservers)
2013        return 0;
2014
2015    // Compare each name server against current name servers.
2016    // TODO: this is incorrect if the list of current or previous nameservers
2017    // contains duplicates. This does not really matter because the framework
2018    // filters out duplicates, but we should probably fix it. It's also
2019    // insensitive to the order of the nameservers; we should probably fix that
2020    // too.
2021    for (i = 0; i < numservers && equal; i++) {
2022        ns = cache_info->nameservers;
2023        equal = 0;
2024        while(*ns) {
2025            if (strcmp(*ns, servers[i]) == 0) {
2026                equal = 1;
2027                break;
2028            }
2029            ns++;
2030        }
2031    }
2032
2033    return equal;
2034}
2035
2036static void
2037_free_nameservers_locked(struct resolv_cache_info* cache_info)
2038{
2039    int i;
2040    for (i = 0; i <= MAXNS; i++) {
2041        free(cache_info->nameservers[i]);
2042        cache_info->nameservers[i] = NULL;
2043        if (cache_info->nsaddrinfo[i] != NULL) {
2044            freeaddrinfo(cache_info->nsaddrinfo[i]);
2045            cache_info->nsaddrinfo[i] = NULL;
2046        }
2047    }
2048}
2049
2050void
2051_resolv_populate_res_for_net(res_state statp)
2052{
2053    if (statp == NULL) {
2054        return;
2055    }
2056
2057    pthread_once(&_res_cache_once, _res_cache_init);
2058    pthread_mutex_lock(&_res_cache_list_lock);
2059
2060    struct resolv_cache_info* info = _find_cache_info_locked(statp->netid);
2061    if (info != NULL) {
2062        int nserv;
2063        struct addrinfo* ai;
2064        XLOG("%s: %u\n", __FUNCTION__, statp->netid);
2065        for (nserv = 0; nserv < MAXNS; nserv++) {
2066            ai = info->nsaddrinfo[nserv];
2067            if (ai == NULL) {
2068                break;
2069            }
2070
2071            if ((size_t) ai->ai_addrlen <= sizeof(statp->_u._ext.ext->nsaddrs[0])) {
2072                if (statp->_u._ext.ext != NULL) {
2073                    memcpy(&statp->_u._ext.ext->nsaddrs[nserv], ai->ai_addr, ai->ai_addrlen);
2074                    statp->nsaddr_list[nserv].sin_family = AF_UNSPEC;
2075                } else {
2076                    if ((size_t) ai->ai_addrlen
2077                            <= sizeof(statp->nsaddr_list[0])) {
2078                        memcpy(&statp->nsaddr_list[nserv], ai->ai_addr,
2079                                ai->ai_addrlen);
2080                    } else {
2081                        statp->nsaddr_list[nserv].sin_family = AF_UNSPEC;
2082                    }
2083                }
2084            } else {
2085                XLOG("%s: found too long addrlen", __FUNCTION__);
2086            }
2087        }
2088        statp->nscount = nserv;
2089        // now do search domains.  Note that we cache the offsets as this code runs alot
2090        // but the setting/offset-computer only runs when set/changed
2091        strlcpy(statp->defdname, info->defdname, sizeof(statp->defdname));
2092        register char **pp = statp->dnsrch;
2093        register int *p = info->dnsrch_offset;
2094        while (pp < statp->dnsrch + MAXDNSRCH && *p != -1) {
2095            *pp++ = &statp->defdname[0] + *p++;
2096        }
2097    }
2098    pthread_mutex_unlock(&_res_cache_list_lock);
2099}
2100