1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *  * Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *  * Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *    the documentation and/or other materials provided with the
13 *    distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "resolv_cache.h"
30#include <resolv.h>
31#include <stdlib.h>
32#include <string.h>
33#include <time.h>
34#include "pthread.h"
35
36#include <errno.h>
37#include "arpa_nameser.h"
38#include <sys/system_properties.h>
39#include <net/if.h>
40#include <netdb.h>
41#include <linux/if.h>
42
43#include <arpa/inet.h>
44#include "resolv_private.h"
45#include "resolv_iface.h"
46
47/* This code implements a small and *simple* DNS resolver cache.
48 *
49 * It is only used to cache DNS answers for a time defined by the smallest TTL
50 * among the answer records in order to reduce DNS traffic. It is not supposed
51 * to be a full DNS cache, since we plan to implement that in the future in a
52 * dedicated process running on the system.
53 *
54 * Note that its design is kept simple very intentionally, i.e.:
55 *
56 *  - it takes raw DNS query packet data as input, and returns raw DNS
57 *    answer packet data as output
58 *
59 *    (this means that two similar queries that encode the DNS name
60 *     differently will be treated distinctly).
61 *
62 *    the smallest TTL value among the answer records are used as the time
63 *    to keep an answer in the cache.
64 *
65 *    this is bad, but we absolutely want to avoid parsing the answer packets
66 *    (and should be solved by the later full DNS cache process).
67 *
68 *  - the implementation is just a (query-data) => (answer-data) hash table
69 *    with a trivial least-recently-used expiration policy.
70 *
71 * Doing this keeps the code simple and avoids to deal with a lot of things
72 * that a full DNS cache is expected to do.
73 *
74 * The API is also very simple:
75 *
76 *   - the client calls _resolv_cache_get() to obtain a handle to the cache.
77 *     this will initialize the cache on first usage. the result can be NULL
78 *     if the cache is disabled.
79 *
80 *   - the client calls _resolv_cache_lookup() before performing a query
81 *
82 *     if the function returns RESOLV_CACHE_FOUND, a copy of the answer data
83 *     has been copied into the client-provided answer buffer.
84 *
85 *     if the function returns RESOLV_CACHE_NOTFOUND, the client should perform
86 *     a request normally, *then* call _resolv_cache_add() to add the received
87 *     answer to the cache.
88 *
89 *     if the function returns RESOLV_CACHE_UNSUPPORTED, the client should
90 *     perform a request normally, and *not* call _resolv_cache_add()
91 *
92 *     note that RESOLV_CACHE_UNSUPPORTED is also returned if the answer buffer
93 *     is too short to accomodate the cached result.
94 *
95 *  - when network settings change, the cache must be flushed since the list
96 *    of DNS servers probably changed. this is done by calling
97 *    _resolv_cache_reset()
98 *
99 *    the parameter to this function must be an ever-increasing generation
100 *    number corresponding to the current network settings state.
101 *
102 *    This is done because several threads could detect the same network
103 *    settings change (but at different times) and will all end up calling the
104 *    same function. Comparing with the last used generation number ensures
105 *    that the cache is only flushed once per network change.
106 */
107
108/* the name of an environment variable that will be checked the first time
109 * this code is called if its value is "0", then the resolver cache is
110 * disabled.
111 */
112#define  CONFIG_ENV  "BIONIC_DNSCACHE"
113
114/* entries older than CONFIG_SECONDS seconds are always discarded.
115 */
116#define  CONFIG_SECONDS    (60*10)    /* 10 minutes */
117
118/* default number of entries kept in the cache. This value has been
119 * determined by browsing through various sites and counting the number
120 * of corresponding requests. Keep in mind that our framework is currently
121 * performing two requests per name lookup (one for IPv4, the other for IPv6)
122 *
123 *    www.google.com      4
124 *    www.ysearch.com     6
125 *    www.amazon.com      8
126 *    www.nytimes.com     22
127 *    www.espn.com        28
128 *    www.msn.com         28
129 *    www.lemonde.fr      35
130 *
131 * (determined in 2009-2-17 from Paris, France, results may vary depending
132 *  on location)
133 *
134 * most high-level websites use lots of media/ad servers with different names
135 * but these are generally reused when browsing through the site.
136 *
137 * As such, a value of 64 should be relatively comfortable at the moment.
138 *
139 * The system property ro.net.dns_cache_size can be used to override the default
140 * value with a custom value
141 *
142 *
143 * ******************************************
144 * * NOTE - this has changed.
145 * * 1) we've added IPv6 support so each dns query results in 2 responses
146 * * 2) we've made this a system-wide cache, so the cost is less (it's not
147 * *    duplicated in each process) and the need is greater (more processes
148 * *    making different requests).
149 * * Upping by 2x for IPv6
150 * * Upping by another 5x for the centralized nature
151 * *****************************************
152 */
153#define  CONFIG_MAX_ENTRIES    64 * 2 * 5
154/* name of the system property that can be used to set the cache size */
155#define  DNS_CACHE_SIZE_PROP_NAME   "ro.net.dns_cache_size"
156
157/****************************************************************************/
158/****************************************************************************/
159/*****                                                                  *****/
160/*****                                                                  *****/
161/*****                                                                  *****/
162/****************************************************************************/
163/****************************************************************************/
164
165/* set to 1 to debug cache operations */
166#define  DEBUG       0
167
168/* set to 1 to debug query data */
169#define  DEBUG_DATA  0
170
171#undef XLOG
172#if DEBUG
173#  include <logd.h>
174#  define  XLOG(...)   \
175    __libc_android_log_print(ANDROID_LOG_DEBUG,"libc",__VA_ARGS__)
176
177#include <stdio.h>
178#include <stdarg.h>
179
180/** BOUNDED BUFFER FORMATTING
181 **/
182
183/* technical note:
184 *
185 *   the following debugging routines are used to append data to a bounded
186 *   buffer they take two parameters that are:
187 *
188 *   - p : a pointer to the current cursor position in the buffer
189 *         this value is initially set to the buffer's address.
190 *
191 *   - end : the address of the buffer's limit, i.e. of the first byte
192 *           after the buffer. this address should never be touched.
193 *
194 *           IMPORTANT: it is assumed that end > buffer_address, i.e.
195 *                      that the buffer is at least one byte.
196 *
197 *   the _bprint_() functions return the new value of 'p' after the data
198 *   has been appended, and also ensure the following:
199 *
200 *   - the returned value will never be strictly greater than 'end'
201 *
202 *   - a return value equal to 'end' means that truncation occured
203 *     (in which case, end[-1] will be set to 0)
204 *
205 *   - after returning from a _bprint_() function, the content of the buffer
206 *     is always 0-terminated, even in the event of truncation.
207 *
208 *  these conventions allow you to call _bprint_ functions multiple times and
209 *  only check for truncation at the end of the sequence, as in:
210 *
211 *     char  buff[1000], *p = buff, *end = p + sizeof(buff);
212 *
213 *     p = _bprint_c(p, end, '"');
214 *     p = _bprint_s(p, end, my_string);
215 *     p = _bprint_c(p, end, '"');
216 *
217 *     if (p >= end) {
218 *        // buffer was too small
219 *     }
220 *
221 *     printf( "%s", buff );
222 */
223
224/* add a char to a bounded buffer */
225static char*
226_bprint_c( char*  p, char*  end, int  c )
227{
228    if (p < end) {
229        if (p+1 == end)
230            *p++ = 0;
231        else {
232            *p++ = (char) c;
233            *p   = 0;
234        }
235    }
236    return p;
237}
238
239/* add a sequence of bytes to a bounded buffer */
240static char*
241_bprint_b( char*  p, char*  end, const char*  buf, int  len )
242{
243    int  avail = end - p;
244
245    if (avail <= 0 || len <= 0)
246        return p;
247
248    if (avail > len)
249        avail = len;
250
251    memcpy( p, buf, avail );
252    p += avail;
253
254    if (p < end)
255        p[0] = 0;
256    else
257        end[-1] = 0;
258
259    return p;
260}
261
262/* add a string to a bounded buffer */
263static char*
264_bprint_s( char*  p, char*  end, const char*  str )
265{
266    return _bprint_b(p, end, str, strlen(str));
267}
268
269/* add a formatted string to a bounded buffer */
270static char*
271_bprint( char*  p, char*  end, const char*  format, ... )
272{
273    int      avail, n;
274    va_list  args;
275
276    avail = end - p;
277
278    if (avail <= 0)
279        return p;
280
281    va_start(args, format);
282    n = vsnprintf( p, avail, format, args);
283    va_end(args);
284
285    /* certain C libraries return -1 in case of truncation */
286    if (n < 0 || n > avail)
287        n = avail;
288
289    p += n;
290    /* certain C libraries do not zero-terminate in case of truncation */
291    if (p == end)
292        p[-1] = 0;
293
294    return p;
295}
296
297/* add a hex value to a bounded buffer, up to 8 digits */
298static char*
299_bprint_hex( char*  p, char*  end, unsigned  value, int  numDigits )
300{
301    char   text[sizeof(unsigned)*2];
302    int    nn = 0;
303
304    while (numDigits-- > 0) {
305        text[nn++] = "0123456789abcdef"[(value >> (numDigits*4)) & 15];
306    }
307    return _bprint_b(p, end, text, nn);
308}
309
310/* add the hexadecimal dump of some memory area to a bounded buffer */
311static char*
312_bprint_hexdump( char*  p, char*  end, const uint8_t*  data, int  datalen )
313{
314    int   lineSize = 16;
315
316    while (datalen > 0) {
317        int  avail = datalen;
318        int  nn;
319
320        if (avail > lineSize)
321            avail = lineSize;
322
323        for (nn = 0; nn < avail; nn++) {
324            if (nn > 0)
325                p = _bprint_c(p, end, ' ');
326            p = _bprint_hex(p, end, data[nn], 2);
327        }
328        for ( ; nn < lineSize; nn++ ) {
329            p = _bprint_s(p, end, "   ");
330        }
331        p = _bprint_s(p, end, "  ");
332
333        for (nn = 0; nn < avail; nn++) {
334            int  c = data[nn];
335
336            if (c < 32 || c > 127)
337                c = '.';
338
339            p = _bprint_c(p, end, c);
340        }
341        p = _bprint_c(p, end, '\n');
342
343        data    += avail;
344        datalen -= avail;
345    }
346    return p;
347}
348
349/* dump the content of a query of packet to the log */
350static void
351XLOG_BYTES( const void*  base, int  len )
352{
353    char  buff[1024];
354    char*  p = buff, *end = p + sizeof(buff);
355
356    p = _bprint_hexdump(p, end, base, len);
357    XLOG("%s",buff);
358}
359
360#else /* !DEBUG */
361#  define  XLOG(...)        ((void)0)
362#  define  XLOG_BYTES(a,b)  ((void)0)
363#endif
364
365static time_t
366_time_now( void )
367{
368    struct timeval  tv;
369
370    gettimeofday( &tv, NULL );
371    return tv.tv_sec;
372}
373
374/* reminder: the general format of a DNS packet is the following:
375 *
376 *    HEADER  (12 bytes)
377 *    QUESTION  (variable)
378 *    ANSWER (variable)
379 *    AUTHORITY (variable)
380 *    ADDITIONNAL (variable)
381 *
382 * the HEADER is made of:
383 *
384 *   ID     : 16 : 16-bit unique query identification field
385 *
386 *   QR     :  1 : set to 0 for queries, and 1 for responses
387 *   Opcode :  4 : set to 0 for queries
388 *   AA     :  1 : set to 0 for queries
389 *   TC     :  1 : truncation flag, will be set to 0 in queries
390 *   RD     :  1 : recursion desired
391 *
392 *   RA     :  1 : recursion available (0 in queries)
393 *   Z      :  3 : three reserved zero bits
394 *   RCODE  :  4 : response code (always 0=NOERROR in queries)
395 *
396 *   QDCount: 16 : question count
397 *   ANCount: 16 : Answer count (0 in queries)
398 *   NSCount: 16: Authority Record count (0 in queries)
399 *   ARCount: 16: Additionnal Record count (0 in queries)
400 *
401 * the QUESTION is made of QDCount Question Record (QRs)
402 * the ANSWER is made of ANCount RRs
403 * the AUTHORITY is made of NSCount RRs
404 * the ADDITIONNAL is made of ARCount RRs
405 *
406 * Each Question Record (QR) is made of:
407 *
408 *   QNAME   : variable : Query DNS NAME
409 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
410 *   CLASS   : 16       : class of query (IN=1)
411 *
412 * Each Resource Record (RR) is made of:
413 *
414 *   NAME    : variable : DNS NAME
415 *   TYPE    : 16       : type of query (A=1, PTR=12, MX=15, AAAA=28, ALL=255)
416 *   CLASS   : 16       : class of query (IN=1)
417 *   TTL     : 32       : seconds to cache this RR (0=none)
418 *   RDLENGTH: 16       : size of RDDATA in bytes
419 *   RDDATA  : variable : RR data (depends on TYPE)
420 *
421 * Each QNAME contains a domain name encoded as a sequence of 'labels'
422 * terminated by a zero. Each label has the following format:
423 *
424 *    LEN  : 8     : lenght of label (MUST be < 64)
425 *    NAME : 8*LEN : label length (must exclude dots)
426 *
427 * A value of 0 in the encoding is interpreted as the 'root' domain and
428 * terminates the encoding. So 'www.android.com' will be encoded as:
429 *
430 *   <3>www<7>android<3>com<0>
431 *
432 * Where <n> represents the byte with value 'n'
433 *
434 * Each NAME reflects the QNAME of the question, but has a slightly more
435 * complex encoding in order to provide message compression. This is achieved
436 * by using a 2-byte pointer, with format:
437 *
438 *    TYPE   : 2  : 0b11 to indicate a pointer, 0b01 and 0b10 are reserved
439 *    OFFSET : 14 : offset to another part of the DNS packet
440 *
441 * The offset is relative to the start of the DNS packet and must point
442 * A pointer terminates the encoding.
443 *
444 * The NAME can be encoded in one of the following formats:
445 *
446 *   - a sequence of simple labels terminated by 0 (like QNAMEs)
447 *   - a single pointer
448 *   - a sequence of simple labels terminated by a pointer
449 *
450 * A pointer shall always point to either a pointer of a sequence of
451 * labels (which can themselves be terminated by either a 0 or a pointer)
452 *
453 * The expanded length of a given domain name should not exceed 255 bytes.
454 *
455 * NOTE: we don't parse the answer packets, so don't need to deal with NAME
456 *       records, only QNAMEs.
457 */
458
459#define  DNS_HEADER_SIZE  12
460
461#define  DNS_TYPE_A   "\00\01"   /* big-endian decimal 1 */
462#define  DNS_TYPE_PTR "\00\014"  /* big-endian decimal 12 */
463#define  DNS_TYPE_MX  "\00\017"  /* big-endian decimal 15 */
464#define  DNS_TYPE_AAAA "\00\034" /* big-endian decimal 28 */
465#define  DNS_TYPE_ALL "\00\0377" /* big-endian decimal 255 */
466
467#define  DNS_CLASS_IN "\00\01"   /* big-endian decimal 1 */
468
469typedef struct {
470    const uint8_t*  base;
471    const uint8_t*  end;
472    const uint8_t*  cursor;
473} DnsPacket;
474
475static void
476_dnsPacket_init( DnsPacket*  packet, const uint8_t*  buff, int  bufflen )
477{
478    packet->base   = buff;
479    packet->end    = buff + bufflen;
480    packet->cursor = buff;
481}
482
483static void
484_dnsPacket_rewind( DnsPacket*  packet )
485{
486    packet->cursor = packet->base;
487}
488
489static void
490_dnsPacket_skip( DnsPacket*  packet, int  count )
491{
492    const uint8_t*  p = packet->cursor + count;
493
494    if (p > packet->end)
495        p = packet->end;
496
497    packet->cursor = p;
498}
499
500static int
501_dnsPacket_readInt16( DnsPacket*  packet )
502{
503    const uint8_t*  p = packet->cursor;
504
505    if (p+2 > packet->end)
506        return -1;
507
508    packet->cursor = p+2;
509    return (p[0]<< 8) | p[1];
510}
511
512/** QUERY CHECKING
513 **/
514
515/* check bytes in a dns packet. returns 1 on success, 0 on failure.
516 * the cursor is only advanced in the case of success
517 */
518static int
519_dnsPacket_checkBytes( DnsPacket*  packet, int  numBytes, const void*  bytes )
520{
521    const uint8_t*  p = packet->cursor;
522
523    if (p + numBytes > packet->end)
524        return 0;
525
526    if (memcmp(p, bytes, numBytes) != 0)
527        return 0;
528
529    packet->cursor = p + numBytes;
530    return 1;
531}
532
533/* parse and skip a given QNAME stored in a query packet,
534 * from the current cursor position. returns 1 on success,
535 * or 0 for malformed data.
536 */
537static int
538_dnsPacket_checkQName( DnsPacket*  packet )
539{
540    const uint8_t*  p   = packet->cursor;
541    const uint8_t*  end = packet->end;
542
543    for (;;) {
544        int  c;
545
546        if (p >= end)
547            break;
548
549        c = *p++;
550
551        if (c == 0) {
552            packet->cursor = p;
553            return 1;
554        }
555
556        /* we don't expect label compression in QNAMEs */
557        if (c >= 64)
558            break;
559
560        p += c;
561        /* we rely on the bound check at the start
562         * of the loop here */
563    }
564    /* malformed data */
565    XLOG("malformed QNAME");
566    return 0;
567}
568
569/* parse and skip a given QR stored in a packet.
570 * returns 1 on success, and 0 on failure
571 */
572static int
573_dnsPacket_checkQR( DnsPacket*  packet )
574{
575    int  len;
576
577    if (!_dnsPacket_checkQName(packet))
578        return 0;
579
580    /* TYPE must be one of the things we support */
581    if (!_dnsPacket_checkBytes(packet, 2, DNS_TYPE_A) &&
582        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_PTR) &&
583        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_MX) &&
584        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_AAAA) &&
585        !_dnsPacket_checkBytes(packet, 2, DNS_TYPE_ALL))
586    {
587        XLOG("unsupported TYPE");
588        return 0;
589    }
590    /* CLASS must be IN */
591    if (!_dnsPacket_checkBytes(packet, 2, DNS_CLASS_IN)) {
592        XLOG("unsupported CLASS");
593        return 0;
594    }
595
596    return 1;
597}
598
599/* check the header of a DNS Query packet, return 1 if it is one
600 * type of query we can cache, or 0 otherwise
601 */
602static int
603_dnsPacket_checkQuery( DnsPacket*  packet )
604{
605    const uint8_t*  p = packet->base;
606    int             qdCount, anCount, dnCount, arCount;
607
608    if (p + DNS_HEADER_SIZE > packet->end) {
609        XLOG("query packet too small");
610        return 0;
611    }
612
613    /* QR must be set to 0, opcode must be 0 and AA must be 0 */
614    /* RA, Z, and RCODE must be 0 */
615    if ((p[2] & 0xFC) != 0 || p[3] != 0) {
616        XLOG("query packet flags unsupported");
617        return 0;
618    }
619
620    /* Note that we ignore the TC and RD bits here for the
621     * following reasons:
622     *
623     * - there is no point for a query packet sent to a server
624     *   to have the TC bit set, but the implementation might
625     *   set the bit in the query buffer for its own needs
626     *   between a _resolv_cache_lookup and a
627     *   _resolv_cache_add. We should not freak out if this
628     *   is the case.
629     *
630     * - we consider that the result from a RD=0 or a RD=1
631     *   query might be different, hence that the RD bit
632     *   should be used to differentiate cached result.
633     *
634     *   this implies that RD is checked when hashing or
635     *   comparing query packets, but not TC
636     */
637
638    /* ANCOUNT, DNCOUNT and ARCOUNT must be 0 */
639    qdCount = (p[4] << 8) | p[5];
640    anCount = (p[6] << 8) | p[7];
641    dnCount = (p[8] << 8) | p[9];
642    arCount = (p[10]<< 8) | p[11];
643
644    if (anCount != 0 || dnCount != 0 || arCount != 0) {
645        XLOG("query packet contains non-query records");
646        return 0;
647    }
648
649    if (qdCount == 0) {
650        XLOG("query packet doesn't contain query record");
651        return 0;
652    }
653
654    /* Check QDCOUNT QRs */
655    packet->cursor = p + DNS_HEADER_SIZE;
656
657    for (;qdCount > 0; qdCount--)
658        if (!_dnsPacket_checkQR(packet))
659            return 0;
660
661    return 1;
662}
663
664/** QUERY DEBUGGING
665 **/
666#if DEBUG
667static char*
668_dnsPacket_bprintQName(DnsPacket*  packet, char*  bp, char*  bend)
669{
670    const uint8_t*  p   = packet->cursor;
671    const uint8_t*  end = packet->end;
672    int             first = 1;
673
674    for (;;) {
675        int  c;
676
677        if (p >= end)
678            break;
679
680        c = *p++;
681
682        if (c == 0) {
683            packet->cursor = p;
684            return bp;
685        }
686
687        /* we don't expect label compression in QNAMEs */
688        if (c >= 64)
689            break;
690
691        if (first)
692            first = 0;
693        else
694            bp = _bprint_c(bp, bend, '.');
695
696        bp = _bprint_b(bp, bend, (const char*)p, c);
697
698        p += c;
699        /* we rely on the bound check at the start
700         * of the loop here */
701    }
702    /* malformed data */
703    bp = _bprint_s(bp, bend, "<MALFORMED>");
704    return bp;
705}
706
707static char*
708_dnsPacket_bprintQR(DnsPacket*  packet, char*  p, char*  end)
709{
710#define  QQ(x)   { DNS_TYPE_##x, #x }
711    static const struct {
712        const char*  typeBytes;
713        const char*  typeString;
714    } qTypes[] =
715    {
716        QQ(A), QQ(PTR), QQ(MX), QQ(AAAA), QQ(ALL),
717        { NULL, NULL }
718    };
719    int          nn;
720    const char*  typeString = NULL;
721
722    /* dump QNAME */
723    p = _dnsPacket_bprintQName(packet, p, end);
724
725    /* dump TYPE */
726    p = _bprint_s(p, end, " (");
727
728    for (nn = 0; qTypes[nn].typeBytes != NULL; nn++) {
729        if (_dnsPacket_checkBytes(packet, 2, qTypes[nn].typeBytes)) {
730            typeString = qTypes[nn].typeString;
731            break;
732        }
733    }
734
735    if (typeString != NULL)
736        p = _bprint_s(p, end, typeString);
737    else {
738        int  typeCode = _dnsPacket_readInt16(packet);
739        p = _bprint(p, end, "UNKNOWN-%d", typeCode);
740    }
741
742    p = _bprint_c(p, end, ')');
743
744    /* skip CLASS */
745    _dnsPacket_skip(packet, 2);
746    return p;
747}
748
749/* this function assumes the packet has already been checked */
750static char*
751_dnsPacket_bprintQuery( DnsPacket*  packet, char*  p, char*  end )
752{
753    int   qdCount;
754
755    if (packet->base[2] & 0x1) {
756        p = _bprint_s(p, end, "RECURSIVE ");
757    }
758
759    _dnsPacket_skip(packet, 4);
760    qdCount = _dnsPacket_readInt16(packet);
761    _dnsPacket_skip(packet, 6);
762
763    for ( ; qdCount > 0; qdCount-- ) {
764        p = _dnsPacket_bprintQR(packet, p, end);
765    }
766    return p;
767}
768#endif
769
770
771/** QUERY HASHING SUPPORT
772 **
773 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKET HAS ALREADY
774 ** BEEN SUCCESFULLY CHECKED.
775 **/
776
777/* use 32-bit FNV hash function */
778#define  FNV_MULT   16777619U
779#define  FNV_BASIS  2166136261U
780
781static unsigned
782_dnsPacket_hashBytes( DnsPacket*  packet, int  numBytes, unsigned  hash )
783{
784    const uint8_t*  p   = packet->cursor;
785    const uint8_t*  end = packet->end;
786
787    while (numBytes > 0 && p < end) {
788        hash = hash*FNV_MULT ^ *p++;
789    }
790    packet->cursor = p;
791    return hash;
792}
793
794
795static unsigned
796_dnsPacket_hashQName( DnsPacket*  packet, unsigned  hash )
797{
798    const uint8_t*  p   = packet->cursor;
799    const uint8_t*  end = packet->end;
800
801    for (;;) {
802        int  c;
803
804        if (p >= end) {  /* should not happen */
805            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
806            break;
807        }
808
809        c = *p++;
810
811        if (c == 0)
812            break;
813
814        if (c >= 64) {
815            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
816            break;
817        }
818        if (p + c >= end) {
819            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
820                    __FUNCTION__);
821            break;
822        }
823        while (c > 0) {
824            hash = hash*FNV_MULT ^ *p++;
825            c   -= 1;
826        }
827    }
828    packet->cursor = p;
829    return hash;
830}
831
832static unsigned
833_dnsPacket_hashQR( DnsPacket*  packet, unsigned  hash )
834{
835    int   len;
836
837    hash = _dnsPacket_hashQName(packet, hash);
838    hash = _dnsPacket_hashBytes(packet, 4, hash); /* TYPE and CLASS */
839    return hash;
840}
841
842static unsigned
843_dnsPacket_hashQuery( DnsPacket*  packet )
844{
845    unsigned  hash = FNV_BASIS;
846    int       count;
847    _dnsPacket_rewind(packet);
848
849    /* we ignore the TC bit for reasons explained in
850     * _dnsPacket_checkQuery().
851     *
852     * however we hash the RD bit to differentiate
853     * between answers for recursive and non-recursive
854     * queries.
855     */
856    hash = hash*FNV_MULT ^ (packet->base[2] & 1);
857
858    /* assume: other flags are 0 */
859    _dnsPacket_skip(packet, 4);
860
861    /* read QDCOUNT */
862    count = _dnsPacket_readInt16(packet);
863
864    /* assume: ANcount, NScount, ARcount are 0 */
865    _dnsPacket_skip(packet, 6);
866
867    /* hash QDCOUNT QRs */
868    for ( ; count > 0; count-- )
869        hash = _dnsPacket_hashQR(packet, hash);
870
871    return hash;
872}
873
874
875/** QUERY COMPARISON
876 **
877 ** THE FOLLOWING CODE ASSUMES THAT THE INPUT PACKETS HAVE ALREADY
878 ** BEEN SUCCESFULLY CHECKED.
879 **/
880
881static int
882_dnsPacket_isEqualDomainName( DnsPacket*  pack1, DnsPacket*  pack2 )
883{
884    const uint8_t*  p1   = pack1->cursor;
885    const uint8_t*  end1 = pack1->end;
886    const uint8_t*  p2   = pack2->cursor;
887    const uint8_t*  end2 = pack2->end;
888
889    for (;;) {
890        int  c1, c2;
891
892        if (p1 >= end1 || p2 >= end2) {
893            XLOG("%s: INTERNAL_ERROR: read-overflow !!\n", __FUNCTION__);
894            break;
895        }
896        c1 = *p1++;
897        c2 = *p2++;
898        if (c1 != c2)
899            break;
900
901        if (c1 == 0) {
902            pack1->cursor = p1;
903            pack2->cursor = p2;
904            return 1;
905        }
906        if (c1 >= 64) {
907            XLOG("%s: INTERNAL_ERROR: malformed domain !!\n", __FUNCTION__);
908            break;
909        }
910        if ((p1+c1 > end1) || (p2+c1 > end2)) {
911            XLOG("%s: INTERNAL_ERROR: simple label read-overflow !!\n",
912                    __FUNCTION__);
913            break;
914        }
915        if (memcmp(p1, p2, c1) != 0)
916            break;
917        p1 += c1;
918        p2 += c1;
919        /* we rely on the bound checks at the start of the loop */
920    }
921    /* not the same, or one is malformed */
922    XLOG("different DN");
923    return 0;
924}
925
926static int
927_dnsPacket_isEqualBytes( DnsPacket*  pack1, DnsPacket*  pack2, int  numBytes )
928{
929    const uint8_t*  p1 = pack1->cursor;
930    const uint8_t*  p2 = pack2->cursor;
931
932    if ( p1 + numBytes > pack1->end || p2 + numBytes > pack2->end )
933        return 0;
934
935    if ( memcmp(p1, p2, numBytes) != 0 )
936        return 0;
937
938    pack1->cursor += numBytes;
939    pack2->cursor += numBytes;
940    return 1;
941}
942
943static int
944_dnsPacket_isEqualQR( DnsPacket*  pack1, DnsPacket*  pack2 )
945{
946    /* compare domain name encoding + TYPE + CLASS */
947    if ( !_dnsPacket_isEqualDomainName(pack1, pack2) ||
948         !_dnsPacket_isEqualBytes(pack1, pack2, 2+2) )
949        return 0;
950
951    return 1;
952}
953
954static int
955_dnsPacket_isEqualQuery( DnsPacket*  pack1, DnsPacket*  pack2 )
956{
957    int  count1, count2;
958
959    /* compare the headers, ignore most fields */
960    _dnsPacket_rewind(pack1);
961    _dnsPacket_rewind(pack2);
962
963    /* compare RD, ignore TC, see comment in _dnsPacket_checkQuery */
964    if ((pack1->base[2] & 1) != (pack2->base[2] & 1)) {
965        XLOG("different RD");
966        return 0;
967    }
968
969    /* assume: other flags are all 0 */
970    _dnsPacket_skip(pack1, 4);
971    _dnsPacket_skip(pack2, 4);
972
973    /* compare QDCOUNT */
974    count1 = _dnsPacket_readInt16(pack1);
975    count2 = _dnsPacket_readInt16(pack2);
976    if (count1 != count2 || count1 < 0) {
977        XLOG("different QDCOUNT");
978        return 0;
979    }
980
981    /* assume: ANcount, NScount and ARcount are all 0 */
982    _dnsPacket_skip(pack1, 6);
983    _dnsPacket_skip(pack2, 6);
984
985    /* compare the QDCOUNT QRs */
986    for ( ; count1 > 0; count1-- ) {
987        if (!_dnsPacket_isEqualQR(pack1, pack2)) {
988            XLOG("different QR");
989            return 0;
990        }
991    }
992    return 1;
993}
994
995/****************************************************************************/
996/****************************************************************************/
997/*****                                                                  *****/
998/*****                                                                  *****/
999/*****                                                                  *****/
1000/****************************************************************************/
1001/****************************************************************************/
1002
1003/* cache entry. for simplicity, 'hash' and 'hlink' are inlined in this
1004 * structure though they are conceptually part of the hash table.
1005 *
1006 * similarly, mru_next and mru_prev are part of the global MRU list
1007 */
1008typedef struct Entry {
1009    unsigned int     hash;   /* hash value */
1010    struct Entry*    hlink;  /* next in collision chain */
1011    struct Entry*    mru_prev;
1012    struct Entry*    mru_next;
1013
1014    const uint8_t*   query;
1015    int              querylen;
1016    const uint8_t*   answer;
1017    int              answerlen;
1018    time_t           expires;   /* time_t when the entry isn't valid any more */
1019    int              id;        /* for debugging purpose */
1020} Entry;
1021
1022/**
1023 * Parse the answer records and find the smallest
1024 * TTL among the answer records.
1025 *
1026 * The returned TTL is the number of seconds to
1027 * keep the answer in the cache.
1028 *
1029 * In case of parse error zero (0) is returned which
1030 * indicates that the answer shall not be cached.
1031 */
1032static u_long
1033answer_getTTL(const void* answer, int answerlen)
1034{
1035    ns_msg handle;
1036    int ancount, n;
1037    u_long result, ttl;
1038    ns_rr rr;
1039
1040    result = 0;
1041    if (ns_initparse(answer, answerlen, &handle) >= 0) {
1042        // get number of answer records
1043        ancount = ns_msg_count(handle, ns_s_an);
1044        for (n = 0; n < ancount; n++) {
1045            if (ns_parserr(&handle, ns_s_an, n, &rr) == 0) {
1046                ttl = ns_rr_ttl(rr);
1047                if (n == 0 || ttl < result) {
1048                    result = ttl;
1049                }
1050            } else {
1051                XLOG("ns_parserr failed ancount no = %d. errno = %s\n", n, strerror(errno));
1052            }
1053        }
1054    } else {
1055        XLOG("ns_parserr failed. %s\n", strerror(errno));
1056    }
1057
1058    XLOG("TTL = %d\n", result);
1059
1060    return result;
1061}
1062
1063static void
1064entry_free( Entry*  e )
1065{
1066    /* everything is allocated in a single memory block */
1067    if (e) {
1068        free(e);
1069    }
1070}
1071
1072static __inline__ void
1073entry_mru_remove( Entry*  e )
1074{
1075    e->mru_prev->mru_next = e->mru_next;
1076    e->mru_next->mru_prev = e->mru_prev;
1077}
1078
1079static __inline__ void
1080entry_mru_add( Entry*  e, Entry*  list )
1081{
1082    Entry*  first = list->mru_next;
1083
1084    e->mru_next = first;
1085    e->mru_prev = list;
1086
1087    list->mru_next  = e;
1088    first->mru_prev = e;
1089}
1090
1091/* compute the hash of a given entry, this is a hash of most
1092 * data in the query (key) */
1093static unsigned
1094entry_hash( const Entry*  e )
1095{
1096    DnsPacket  pack[1];
1097
1098    _dnsPacket_init(pack, e->query, e->querylen);
1099    return _dnsPacket_hashQuery(pack);
1100}
1101
1102/* initialize an Entry as a search key, this also checks the input query packet
1103 * returns 1 on success, or 0 in case of unsupported/malformed data */
1104static int
1105entry_init_key( Entry*  e, const void*  query, int  querylen )
1106{
1107    DnsPacket  pack[1];
1108
1109    memset(e, 0, sizeof(*e));
1110
1111    e->query    = query;
1112    e->querylen = querylen;
1113    e->hash     = entry_hash(e);
1114
1115    _dnsPacket_init(pack, query, querylen);
1116
1117    return _dnsPacket_checkQuery(pack);
1118}
1119
1120/* allocate a new entry as a cache node */
1121static Entry*
1122entry_alloc( const Entry*  init, const void*  answer, int  answerlen )
1123{
1124    Entry*  e;
1125    int     size;
1126
1127    size = sizeof(*e) + init->querylen + answerlen;
1128    e    = calloc(size, 1);
1129    if (e == NULL)
1130        return e;
1131
1132    e->hash     = init->hash;
1133    e->query    = (const uint8_t*)(e+1);
1134    e->querylen = init->querylen;
1135
1136    memcpy( (char*)e->query, init->query, e->querylen );
1137
1138    e->answer    = e->query + e->querylen;
1139    e->answerlen = answerlen;
1140
1141    memcpy( (char*)e->answer, answer, e->answerlen );
1142
1143    return e;
1144}
1145
1146static int
1147entry_equals( const Entry*  e1, const Entry*  e2 )
1148{
1149    DnsPacket  pack1[1], pack2[1];
1150
1151    if (e1->querylen != e2->querylen) {
1152        return 0;
1153    }
1154    _dnsPacket_init(pack1, e1->query, e1->querylen);
1155    _dnsPacket_init(pack2, e2->query, e2->querylen);
1156
1157    return _dnsPacket_isEqualQuery(pack1, pack2);
1158}
1159
1160/****************************************************************************/
1161/****************************************************************************/
1162/*****                                                                  *****/
1163/*****                                                                  *****/
1164/*****                                                                  *****/
1165/****************************************************************************/
1166/****************************************************************************/
1167
1168/* We use a simple hash table with external collision lists
1169 * for simplicity, the hash-table fields 'hash' and 'hlink' are
1170 * inlined in the Entry structure.
1171 */
1172
1173/* Maximum time for a thread to wait for an pending request */
1174#define PENDING_REQUEST_TIMEOUT 20;
1175
1176typedef struct pending_req_info {
1177    unsigned int                hash;
1178    pthread_cond_t              cond;
1179    struct pending_req_info*    next;
1180} PendingReqInfo;
1181
1182typedef struct resolv_cache {
1183    int              max_entries;
1184    int              num_entries;
1185    Entry            mru_list;
1186    pthread_mutex_t  lock;
1187    unsigned         generation;
1188    int              last_id;
1189    Entry*           entries;
1190    PendingReqInfo   pending_requests;
1191} Cache;
1192
1193typedef struct resolv_cache_info {
1194    char                        ifname[IF_NAMESIZE + 1];
1195    struct in_addr              ifaddr;
1196    Cache*                      cache;
1197    struct resolv_cache_info*   next;
1198    char*                       nameservers[MAXNS +1];
1199    struct addrinfo*            nsaddrinfo[MAXNS + 1];
1200} CacheInfo;
1201
1202#define  HTABLE_VALID(x)  ((x) != NULL && (x) != HTABLE_DELETED)
1203
1204static void
1205_cache_flush_pending_requests_locked( struct resolv_cache* cache )
1206{
1207    struct pending_req_info *ri, *tmp;
1208    if (cache) {
1209        ri = cache->pending_requests.next;
1210
1211        while (ri) {
1212            tmp = ri;
1213            ri = ri->next;
1214            pthread_cond_broadcast(&tmp->cond);
1215
1216            pthread_cond_destroy(&tmp->cond);
1217            free(tmp);
1218        }
1219
1220        cache->pending_requests.next = NULL;
1221    }
1222}
1223
1224/* return 0 if no pending request is found matching the key
1225 * if a matching request is found the calling thread will wait
1226 * and return 1 when released */
1227static int
1228_cache_check_pending_request_locked( struct resolv_cache* cache, Entry* key )
1229{
1230    struct pending_req_info *ri, *prev;
1231    int exist = 0;
1232
1233    if (cache && key) {
1234        ri = cache->pending_requests.next;
1235        prev = &cache->pending_requests;
1236        while (ri) {
1237            if (ri->hash == key->hash) {
1238                exist = 1;
1239                break;
1240            }
1241            prev = ri;
1242            ri = ri->next;
1243        }
1244
1245        if (!exist) {
1246            ri = calloc(1, sizeof(struct pending_req_info));
1247            if (ri) {
1248                ri->hash = key->hash;
1249                pthread_cond_init(&ri->cond, NULL);
1250                prev->next = ri;
1251            }
1252        } else {
1253            struct timespec ts = {0,0};
1254            ts.tv_sec = _time_now() + PENDING_REQUEST_TIMEOUT;
1255            int rv = pthread_cond_timedwait(&ri->cond, &cache->lock, &ts);
1256        }
1257    }
1258
1259    return exist;
1260}
1261
1262/* notify any waiting thread that waiting on a request
1263 * matching the key has been added to the cache */
1264static void
1265_cache_notify_waiting_tid_locked( struct resolv_cache* cache, Entry* key )
1266{
1267    struct pending_req_info *ri, *prev;
1268
1269    if (cache && key) {
1270        ri = cache->pending_requests.next;
1271        prev = &cache->pending_requests;
1272        while (ri) {
1273            if (ri->hash == key->hash) {
1274                pthread_cond_broadcast(&ri->cond);
1275                break;
1276            }
1277            prev = ri;
1278            ri = ri->next;
1279        }
1280
1281        // remove item from list and destroy
1282        if (ri) {
1283            prev->next = ri->next;
1284            pthread_cond_destroy(&ri->cond);
1285            free(ri);
1286        }
1287    }
1288}
1289
1290/* notify the cache that the query failed */
1291void
1292_resolv_cache_query_failed( struct resolv_cache* cache,
1293                   const void* query,
1294                   int         querylen)
1295{
1296    Entry    key[1];
1297
1298    if (cache && entry_init_key(key, query, querylen)) {
1299        pthread_mutex_lock(&cache->lock);
1300        _cache_notify_waiting_tid_locked(cache, key);
1301        pthread_mutex_unlock(&cache->lock);
1302    }
1303}
1304
1305static void
1306_cache_flush_locked( Cache*  cache )
1307{
1308    int     nn;
1309    time_t  now = _time_now();
1310
1311    for (nn = 0; nn < cache->max_entries; nn++)
1312    {
1313        Entry**  pnode = (Entry**) &cache->entries[nn];
1314
1315        while (*pnode != NULL) {
1316            Entry*  node = *pnode;
1317            *pnode = node->hlink;
1318            entry_free(node);
1319        }
1320    }
1321
1322    // flush pending request
1323    _cache_flush_pending_requests_locked(cache);
1324
1325    cache->mru_list.mru_next = cache->mru_list.mru_prev = &cache->mru_list;
1326    cache->num_entries       = 0;
1327    cache->last_id           = 0;
1328
1329    XLOG("*************************\n"
1330         "*** DNS CACHE FLUSHED ***\n"
1331         "*************************");
1332}
1333
1334/* Return max number of entries allowed in the cache,
1335 * i.e. cache size. The cache size is either defined
1336 * by system property ro.net.dns_cache_size or by
1337 * CONFIG_MAX_ENTRIES if system property not set
1338 * or set to invalid value. */
1339static int
1340_res_cache_get_max_entries( void )
1341{
1342    int result = -1;
1343    char cache_size[PROP_VALUE_MAX];
1344
1345    const char* cache_mode = getenv("ANDROID_DNS_MODE");
1346
1347    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
1348        // Don't use the cache in local mode.  This is used by the
1349        // proxy itself.
1350        // TODO - change this to 0 when all dns stuff uses proxy (5918973)
1351        XLOG("setup cache for non-cache process. size=1");
1352        return 1;
1353    }
1354
1355    if (__system_property_get(DNS_CACHE_SIZE_PROP_NAME, cache_size) > 0) {
1356        result = atoi(cache_size);
1357    }
1358
1359    // ro.net.dns_cache_size not set or set to negative value
1360    if (result <= 0) {
1361        result = CONFIG_MAX_ENTRIES;
1362    }
1363
1364    XLOG("cache size: %d", result);
1365    return result;
1366}
1367
1368static struct resolv_cache*
1369_resolv_cache_create( void )
1370{
1371    struct resolv_cache*  cache;
1372
1373    cache = calloc(sizeof(*cache), 1);
1374    if (cache) {
1375        cache->max_entries = _res_cache_get_max_entries();
1376        cache->entries = calloc(sizeof(*cache->entries), cache->max_entries);
1377        if (cache->entries) {
1378            cache->generation = ~0U;
1379            pthread_mutex_init( &cache->lock, NULL );
1380            cache->mru_list.mru_prev = cache->mru_list.mru_next = &cache->mru_list;
1381            XLOG("%s: cache created\n", __FUNCTION__);
1382        } else {
1383            free(cache);
1384            cache = NULL;
1385        }
1386    }
1387    return cache;
1388}
1389
1390
1391#if DEBUG
1392static void
1393_dump_query( const uint8_t*  query, int  querylen )
1394{
1395    char       temp[256], *p=temp, *end=p+sizeof(temp);
1396    DnsPacket  pack[1];
1397
1398    _dnsPacket_init(pack, query, querylen);
1399    p = _dnsPacket_bprintQuery(pack, p, end);
1400    XLOG("QUERY: %s", temp);
1401}
1402
1403static void
1404_cache_dump_mru( Cache*  cache )
1405{
1406    char    temp[512], *p=temp, *end=p+sizeof(temp);
1407    Entry*  e;
1408
1409    p = _bprint(temp, end, "MRU LIST (%2d): ", cache->num_entries);
1410    for (e = cache->mru_list.mru_next; e != &cache->mru_list; e = e->mru_next)
1411        p = _bprint(p, end, " %d", e->id);
1412
1413    XLOG("%s", temp);
1414}
1415
1416static void
1417_dump_answer(const void* answer, int answerlen)
1418{
1419    res_state statep;
1420    FILE* fp;
1421    char* buf;
1422    int fileLen;
1423
1424    fp = fopen("/data/reslog.txt", "w+");
1425    if (fp != NULL) {
1426        statep = __res_get_state();
1427
1428        res_pquery(statep, answer, answerlen, fp);
1429
1430        //Get file length
1431        fseek(fp, 0, SEEK_END);
1432        fileLen=ftell(fp);
1433        fseek(fp, 0, SEEK_SET);
1434        buf = (char *)malloc(fileLen+1);
1435        if (buf != NULL) {
1436            //Read file contents into buffer
1437            fread(buf, fileLen, 1, fp);
1438            XLOG("%s\n", buf);
1439            free(buf);
1440        }
1441        fclose(fp);
1442        remove("/data/reslog.txt");
1443    }
1444    else {
1445        XLOG("_dump_answer: can't open file\n");
1446    }
1447}
1448#endif
1449
1450#if DEBUG
1451#  define  XLOG_QUERY(q,len)   _dump_query((q), (len))
1452#  define  XLOG_ANSWER(a, len) _dump_answer((a), (len))
1453#else
1454#  define  XLOG_QUERY(q,len)   ((void)0)
1455#  define  XLOG_ANSWER(a,len)  ((void)0)
1456#endif
1457
1458/* This function tries to find a key within the hash table
1459 * In case of success, it will return a *pointer* to the hashed key.
1460 * In case of failure, it will return a *pointer* to NULL
1461 *
1462 * So, the caller must check '*result' to check for success/failure.
1463 *
1464 * The main idea is that the result can later be used directly in
1465 * calls to _resolv_cache_add or _resolv_cache_remove as the 'lookup'
1466 * parameter. This makes the code simpler and avoids re-searching
1467 * for the key position in the htable.
1468 *
1469 * The result of a lookup_p is only valid until you alter the hash
1470 * table.
1471 */
1472static Entry**
1473_cache_lookup_p( Cache*   cache,
1474                 Entry*   key )
1475{
1476    int      index = key->hash % cache->max_entries;
1477    Entry**  pnode = (Entry**) &cache->entries[ index ];
1478
1479    while (*pnode != NULL) {
1480        Entry*  node = *pnode;
1481
1482        if (node == NULL)
1483            break;
1484
1485        if (node->hash == key->hash && entry_equals(node, key))
1486            break;
1487
1488        pnode = &node->hlink;
1489    }
1490    return pnode;
1491}
1492
1493/* Add a new entry to the hash table. 'lookup' must be the
1494 * result of an immediate previous failed _lookup_p() call
1495 * (i.e. with *lookup == NULL), and 'e' is the pointer to the
1496 * newly created entry
1497 */
1498static void
1499_cache_add_p( Cache*   cache,
1500              Entry**  lookup,
1501              Entry*   e )
1502{
1503    *lookup = e;
1504    e->id = ++cache->last_id;
1505    entry_mru_add(e, &cache->mru_list);
1506    cache->num_entries += 1;
1507
1508    XLOG("%s: entry %d added (count=%d)", __FUNCTION__,
1509         e->id, cache->num_entries);
1510}
1511
1512/* Remove an existing entry from the hash table,
1513 * 'lookup' must be the result of an immediate previous
1514 * and succesful _lookup_p() call.
1515 */
1516static void
1517_cache_remove_p( Cache*   cache,
1518                 Entry**  lookup )
1519{
1520    Entry*  e  = *lookup;
1521
1522    XLOG("%s: entry %d removed (count=%d)", __FUNCTION__,
1523         e->id, cache->num_entries-1);
1524
1525    entry_mru_remove(e);
1526    *lookup = e->hlink;
1527    entry_free(e);
1528    cache->num_entries -= 1;
1529}
1530
1531/* Remove the oldest entry from the hash table.
1532 */
1533static void
1534_cache_remove_oldest( Cache*  cache )
1535{
1536    Entry*   oldest = cache->mru_list.mru_prev;
1537    Entry**  lookup = _cache_lookup_p(cache, oldest);
1538
1539    if (*lookup == NULL) { /* should not happen */
1540        XLOG("%s: OLDEST NOT IN HTABLE ?", __FUNCTION__);
1541        return;
1542    }
1543    if (DEBUG) {
1544        XLOG("Cache full - removing oldest");
1545        XLOG_QUERY(oldest->query, oldest->querylen);
1546    }
1547    _cache_remove_p(cache, lookup);
1548}
1549
1550/* Remove all expired entries from the hash table.
1551 */
1552static void _cache_remove_expired(Cache* cache) {
1553    Entry* e;
1554    time_t now = _time_now();
1555
1556    for (e = cache->mru_list.mru_next; e != &cache->mru_list;) {
1557        // Entry is old, remove
1558        if (now >= e->expires) {
1559            Entry** lookup = _cache_lookup_p(cache, e);
1560            if (*lookup == NULL) { /* should not happen */
1561                XLOG("%s: ENTRY NOT IN HTABLE ?", __FUNCTION__);
1562                return;
1563            }
1564            e = e->mru_next;
1565            _cache_remove_p(cache, lookup);
1566        } else {
1567            e = e->mru_next;
1568        }
1569    }
1570}
1571
1572ResolvCacheStatus
1573_resolv_cache_lookup( struct resolv_cache*  cache,
1574                      const void*           query,
1575                      int                   querylen,
1576                      void*                 answer,
1577                      int                   answersize,
1578                      int                  *answerlen )
1579{
1580    DnsPacket  pack[1];
1581    Entry      key[1];
1582    int        index;
1583    Entry**    lookup;
1584    Entry*     e;
1585    time_t     now;
1586
1587    ResolvCacheStatus  result = RESOLV_CACHE_NOTFOUND;
1588
1589    XLOG("%s: lookup", __FUNCTION__);
1590    XLOG_QUERY(query, querylen);
1591
1592    /* we don't cache malformed queries */
1593    if (!entry_init_key(key, query, querylen)) {
1594        XLOG("%s: unsupported query", __FUNCTION__);
1595        return RESOLV_CACHE_UNSUPPORTED;
1596    }
1597    /* lookup cache */
1598    pthread_mutex_lock( &cache->lock );
1599
1600    /* see the description of _lookup_p to understand this.
1601     * the function always return a non-NULL pointer.
1602     */
1603    lookup = _cache_lookup_p(cache, key);
1604    e      = *lookup;
1605
1606    if (e == NULL) {
1607        XLOG( "NOT IN CACHE");
1608        // calling thread will wait if an outstanding request is found
1609        // that matching this query
1610        if (!_cache_check_pending_request_locked(cache, key)) {
1611            goto Exit;
1612        } else {
1613            lookup = _cache_lookup_p(cache, key);
1614            e = *lookup;
1615            if (e == NULL) {
1616                goto Exit;
1617            }
1618        }
1619    }
1620
1621    now = _time_now();
1622
1623    /* remove stale entries here */
1624    if (now >= e->expires) {
1625        XLOG( " NOT IN CACHE (STALE ENTRY %p DISCARDED)", *lookup );
1626        XLOG_QUERY(e->query, e->querylen);
1627        _cache_remove_p(cache, lookup);
1628        goto Exit;
1629    }
1630
1631    *answerlen = e->answerlen;
1632    if (e->answerlen > answersize) {
1633        /* NOTE: we return UNSUPPORTED if the answer buffer is too short */
1634        result = RESOLV_CACHE_UNSUPPORTED;
1635        XLOG(" ANSWER TOO LONG");
1636        goto Exit;
1637    }
1638
1639    memcpy( answer, e->answer, e->answerlen );
1640
1641    /* bump up this entry to the top of the MRU list */
1642    if (e != cache->mru_list.mru_next) {
1643        entry_mru_remove( e );
1644        entry_mru_add( e, &cache->mru_list );
1645    }
1646
1647    XLOG( "FOUND IN CACHE entry=%p", e );
1648    result = RESOLV_CACHE_FOUND;
1649
1650Exit:
1651    pthread_mutex_unlock( &cache->lock );
1652    return result;
1653}
1654
1655
1656void
1657_resolv_cache_add( struct resolv_cache*  cache,
1658                   const void*           query,
1659                   int                   querylen,
1660                   const void*           answer,
1661                   int                   answerlen )
1662{
1663    Entry    key[1];
1664    Entry*   e;
1665    Entry**  lookup;
1666    u_long   ttl;
1667
1668    /* don't assume that the query has already been cached
1669     */
1670    if (!entry_init_key( key, query, querylen )) {
1671        XLOG( "%s: passed invalid query ?", __FUNCTION__);
1672        return;
1673    }
1674
1675    pthread_mutex_lock( &cache->lock );
1676
1677    XLOG( "%s: query:", __FUNCTION__ );
1678    XLOG_QUERY(query,querylen);
1679    XLOG_ANSWER(answer, answerlen);
1680#if DEBUG_DATA
1681    XLOG( "answer:");
1682    XLOG_BYTES(answer,answerlen);
1683#endif
1684
1685    lookup = _cache_lookup_p(cache, key);
1686    e      = *lookup;
1687
1688    if (e != NULL) { /* should not happen */
1689        XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1690             __FUNCTION__, e);
1691        goto Exit;
1692    }
1693
1694    if (cache->num_entries >= cache->max_entries) {
1695        _cache_remove_expired(cache);
1696        if (cache->num_entries >= cache->max_entries) {
1697            _cache_remove_oldest(cache);
1698        }
1699        /* need to lookup again */
1700        lookup = _cache_lookup_p(cache, key);
1701        e      = *lookup;
1702        if (e != NULL) {
1703            XLOG("%s: ALREADY IN CACHE (%p) ? IGNORING ADD",
1704                __FUNCTION__, e);
1705            goto Exit;
1706        }
1707    }
1708
1709    ttl = answer_getTTL(answer, answerlen);
1710    if (ttl > 0) {
1711        e = entry_alloc(key, answer, answerlen);
1712        if (e != NULL) {
1713            e->expires = ttl + _time_now();
1714            _cache_add_p(cache, lookup, e);
1715        }
1716    }
1717#if DEBUG
1718    _cache_dump_mru(cache);
1719#endif
1720Exit:
1721    _cache_notify_waiting_tid_locked(cache, key);
1722    pthread_mutex_unlock( &cache->lock );
1723}
1724
1725/****************************************************************************/
1726/****************************************************************************/
1727/*****                                                                  *****/
1728/*****                                                                  *****/
1729/*****                                                                  *****/
1730/****************************************************************************/
1731/****************************************************************************/
1732
1733static pthread_once_t        _res_cache_once;
1734
1735// Head of the list of caches.  Protected by _res_cache_list_lock.
1736static struct resolv_cache_info _res_cache_list;
1737
1738// name of the current default inteface
1739static char            _res_default_ifname[IF_NAMESIZE + 1];
1740
1741// lock protecting everything in the _resolve_cache_info structs (next ptr, etc)
1742static pthread_mutex_t _res_cache_list_lock;
1743
1744
1745/* lookup the default interface name */
1746static char *_get_default_iface_locked();
1747/* insert resolv_cache_info into the list of resolv_cache_infos */
1748static void _insert_cache_info_locked(struct resolv_cache_info* cache_info);
1749/* creates a resolv_cache_info */
1750static struct resolv_cache_info* _create_cache_info( void );
1751/* gets cache associated with an interface name, or NULL if none exists */
1752static struct resolv_cache* _find_named_cache_locked(const char* ifname);
1753/* gets a resolv_cache_info associated with an interface name, or NULL if not found */
1754static struct resolv_cache_info* _find_cache_info_locked(const char* ifname);
1755/* free dns name server list of a resolv_cache_info structure */
1756static void _free_nameservers(struct resolv_cache_info* cache_info);
1757/* look up the named cache, and creates one if needed */
1758static struct resolv_cache* _get_res_cache_for_iface_locked(const char* ifname);
1759/* empty the named cache */
1760static void _flush_cache_for_iface_locked(const char* ifname);
1761/* empty the nameservers set for the named cache */
1762static void _free_nameservers_locked(struct resolv_cache_info* cache_info);
1763/* lookup the namserver for the name interface */
1764static int _get_nameserver_locked(const char* ifname, int n, char* addr, int addrLen);
1765/* lookup the addr of the nameserver for the named interface */
1766static struct addrinfo* _get_nameserver_addr_locked(const char* ifname, int n);
1767/* lookup the inteface's address */
1768static struct in_addr* _get_addr_locked(const char * ifname);
1769
1770
1771
1772static void
1773_res_cache_init(void)
1774{
1775    const char*  env = getenv(CONFIG_ENV);
1776
1777    if (env && atoi(env) == 0) {
1778        /* the cache is disabled */
1779        return;
1780    }
1781
1782    memset(&_res_default_ifname, 0, sizeof(_res_default_ifname));
1783    memset(&_res_cache_list, 0, sizeof(_res_cache_list));
1784    pthread_mutex_init(&_res_cache_list_lock, NULL);
1785}
1786
1787struct resolv_cache*
1788__get_res_cache(void)
1789{
1790    struct resolv_cache *cache;
1791
1792    pthread_once(&_res_cache_once, _res_cache_init);
1793
1794    pthread_mutex_lock(&_res_cache_list_lock);
1795
1796    char* ifname = _get_default_iface_locked();
1797
1798    // if default interface not set then use the first cache
1799    // associated with an interface as the default one.
1800    if (ifname[0] == '\0') {
1801        struct resolv_cache_info* cache_info = _res_cache_list.next;
1802        while (cache_info) {
1803            if (cache_info->ifname[0] != '\0') {
1804                ifname = cache_info->ifname;
1805                break;
1806            }
1807
1808            cache_info = cache_info->next;
1809        }
1810    }
1811    cache = _get_res_cache_for_iface_locked(ifname);
1812
1813    pthread_mutex_unlock(&_res_cache_list_lock);
1814    XLOG("_get_res_cache. default_ifname = %s\n", ifname);
1815    return cache;
1816}
1817
1818static struct resolv_cache*
1819_get_res_cache_for_iface_locked(const char* ifname)
1820{
1821    if (ifname == NULL)
1822        return NULL;
1823
1824    struct resolv_cache* cache = _find_named_cache_locked(ifname);
1825    if (!cache) {
1826        struct resolv_cache_info* cache_info = _create_cache_info();
1827        if (cache_info) {
1828            cache = _resolv_cache_create();
1829            if (cache) {
1830                int len = sizeof(cache_info->ifname);
1831                cache_info->cache = cache;
1832                strncpy(cache_info->ifname, ifname, len - 1);
1833                cache_info->ifname[len - 1] = '\0';
1834
1835                _insert_cache_info_locked(cache_info);
1836            } else {
1837                free(cache_info);
1838            }
1839        }
1840    }
1841    return cache;
1842}
1843
1844void
1845_resolv_cache_reset(unsigned  generation)
1846{
1847    XLOG("%s: generation=%d", __FUNCTION__, generation);
1848
1849    pthread_once(&_res_cache_once, _res_cache_init);
1850    pthread_mutex_lock(&_res_cache_list_lock);
1851
1852    char* ifname = _get_default_iface_locked();
1853    // if default interface not set then use the first cache
1854    // associated with an interface as the default one.
1855    // Note: Copied the code from __get_res_cache since this
1856    // method will be deleted/obsolete when cache per interface
1857    // implemented all over
1858    if (ifname[0] == '\0') {
1859        struct resolv_cache_info* cache_info = _res_cache_list.next;
1860        while (cache_info) {
1861            if (cache_info->ifname[0] != '\0') {
1862                ifname = cache_info->ifname;
1863                break;
1864            }
1865
1866            cache_info = cache_info->next;
1867        }
1868    }
1869    struct resolv_cache* cache = _get_res_cache_for_iface_locked(ifname);
1870
1871    if (cache != NULL) {
1872        pthread_mutex_lock( &cache->lock );
1873        if (cache->generation != generation) {
1874            _cache_flush_locked(cache);
1875            cache->generation = generation;
1876        }
1877        pthread_mutex_unlock( &cache->lock );
1878    }
1879
1880    pthread_mutex_unlock(&_res_cache_list_lock);
1881}
1882
1883void
1884_resolv_flush_cache_for_default_iface(void)
1885{
1886    char* ifname;
1887
1888    pthread_once(&_res_cache_once, _res_cache_init);
1889    pthread_mutex_lock(&_res_cache_list_lock);
1890
1891    ifname = _get_default_iface_locked();
1892    _flush_cache_for_iface_locked(ifname);
1893
1894    pthread_mutex_unlock(&_res_cache_list_lock);
1895}
1896
1897void
1898_resolv_flush_cache_for_iface(const char* ifname)
1899{
1900    pthread_once(&_res_cache_once, _res_cache_init);
1901    pthread_mutex_lock(&_res_cache_list_lock);
1902
1903    _flush_cache_for_iface_locked(ifname);
1904
1905    pthread_mutex_unlock(&_res_cache_list_lock);
1906}
1907
1908static void
1909_flush_cache_for_iface_locked(const char* ifname)
1910{
1911    struct resolv_cache* cache = _find_named_cache_locked(ifname);
1912    if (cache) {
1913        pthread_mutex_lock(&cache->lock);
1914        _cache_flush_locked(cache);
1915        pthread_mutex_unlock(&cache->lock);
1916    }
1917}
1918
1919static struct resolv_cache_info*
1920_create_cache_info(void)
1921{
1922    struct resolv_cache_info*  cache_info;
1923
1924    cache_info = calloc(sizeof(*cache_info), 1);
1925    return cache_info;
1926}
1927
1928static void
1929_insert_cache_info_locked(struct resolv_cache_info* cache_info)
1930{
1931    struct resolv_cache_info* last;
1932
1933    for (last = &_res_cache_list; last->next; last = last->next);
1934
1935    last->next = cache_info;
1936
1937}
1938
1939static struct resolv_cache*
1940_find_named_cache_locked(const char* ifname) {
1941
1942    struct resolv_cache_info* info = _find_cache_info_locked(ifname);
1943
1944    if (info != NULL) return info->cache;
1945
1946    return NULL;
1947}
1948
1949static struct resolv_cache_info*
1950_find_cache_info_locked(const char* ifname)
1951{
1952    if (ifname == NULL)
1953        return NULL;
1954
1955    struct resolv_cache_info* cache_info = _res_cache_list.next;
1956
1957    while (cache_info) {
1958        if (strcmp(cache_info->ifname, ifname) == 0) {
1959            break;
1960        }
1961
1962        cache_info = cache_info->next;
1963    }
1964    return cache_info;
1965}
1966
1967static char*
1968_get_default_iface_locked(void)
1969{
1970    char* iface = _res_default_ifname;
1971
1972    return iface;
1973}
1974
1975void
1976_resolv_set_default_iface(const char* ifname)
1977{
1978    XLOG("_resolv_set_default_if ifname %s\n",ifname);
1979
1980    pthread_once(&_res_cache_once, _res_cache_init);
1981    pthread_mutex_lock(&_res_cache_list_lock);
1982
1983    int size = sizeof(_res_default_ifname);
1984    memset(_res_default_ifname, 0, size);
1985    strncpy(_res_default_ifname, ifname, size - 1);
1986    _res_default_ifname[size - 1] = '\0';
1987
1988    pthread_mutex_unlock(&_res_cache_list_lock);
1989}
1990
1991void
1992_resolv_set_nameservers_for_iface(const char* ifname, char** servers, int numservers)
1993{
1994    int i, rt, index;
1995    struct addrinfo hints;
1996    char sbuf[NI_MAXSERV];
1997
1998    pthread_once(&_res_cache_once, _res_cache_init);
1999
2000    pthread_mutex_lock(&_res_cache_list_lock);
2001    // creates the cache if not created
2002    _get_res_cache_for_iface_locked(ifname);
2003
2004    struct resolv_cache_info* cache_info = _find_cache_info_locked(ifname);
2005
2006    if (cache_info != NULL) {
2007        // free current before adding new
2008        _free_nameservers_locked(cache_info);
2009
2010        memset(&hints, 0, sizeof(hints));
2011        hints.ai_family = PF_UNSPEC;
2012        hints.ai_socktype = SOCK_DGRAM; /*dummy*/
2013        hints.ai_flags = AI_NUMERICHOST;
2014        sprintf(sbuf, "%u", NAMESERVER_PORT);
2015
2016        index = 0;
2017        for (i = 0; i < numservers && i < MAXNS; i++) {
2018            rt = getaddrinfo(servers[i], sbuf, &hints, &cache_info->nsaddrinfo[index]);
2019            if (rt == 0) {
2020                cache_info->nameservers[index] = strdup(servers[i]);
2021                index++;
2022            } else {
2023                cache_info->nsaddrinfo[index] = NULL;
2024            }
2025        }
2026    }
2027    pthread_mutex_unlock(&_res_cache_list_lock);
2028}
2029
2030static void
2031_free_nameservers_locked(struct resolv_cache_info* cache_info)
2032{
2033    int i;
2034    for (i = 0; i <= MAXNS; i++) {
2035        free(cache_info->nameservers[i]);
2036        cache_info->nameservers[i] = NULL;
2037        if (cache_info->nsaddrinfo[i] != NULL) {
2038            freeaddrinfo(cache_info->nsaddrinfo[i]);
2039            cache_info->nsaddrinfo[i] = NULL;
2040        }
2041    }
2042}
2043
2044int
2045_resolv_cache_get_nameserver(int n, char* addr, int addrLen)
2046{
2047    char *ifname;
2048    int result = 0;
2049
2050    pthread_once(&_res_cache_once, _res_cache_init);
2051    pthread_mutex_lock(&_res_cache_list_lock);
2052
2053    ifname = _get_default_iface_locked();
2054    result = _get_nameserver_locked(ifname, n, addr, addrLen);
2055
2056    pthread_mutex_unlock(&_res_cache_list_lock);
2057    return result;
2058}
2059
2060static int
2061_get_nameserver_locked(const char* ifname, int n, char* addr, int addrLen)
2062{
2063    int len = 0;
2064    char* ns;
2065    struct resolv_cache_info* cache_info;
2066
2067    if (n < 1 || n > MAXNS || !addr)
2068        return 0;
2069
2070    cache_info = _find_cache_info_locked(ifname);
2071    if (cache_info) {
2072        ns = cache_info->nameservers[n - 1];
2073        if (ns) {
2074            len = strlen(ns);
2075            if (len < addrLen) {
2076                strncpy(addr, ns, len);
2077                addr[len] = '\0';
2078            } else {
2079                len = 0;
2080            }
2081        }
2082    }
2083
2084    return len;
2085}
2086
2087struct addrinfo*
2088_cache_get_nameserver_addr(int n)
2089{
2090    struct addrinfo *result;
2091    char* ifname;
2092
2093    pthread_once(&_res_cache_once, _res_cache_init);
2094    pthread_mutex_lock(&_res_cache_list_lock);
2095
2096    ifname = _get_default_iface_locked();
2097
2098    result = _get_nameserver_addr_locked(ifname, n);
2099    pthread_mutex_unlock(&_res_cache_list_lock);
2100    return result;
2101}
2102
2103static struct addrinfo*
2104_get_nameserver_addr_locked(const char* ifname, int n)
2105{
2106    struct addrinfo* ai = NULL;
2107    struct resolv_cache_info* cache_info;
2108
2109    if (n < 1 || n > MAXNS)
2110        return NULL;
2111
2112    cache_info = _find_cache_info_locked(ifname);
2113    if (cache_info) {
2114        ai = cache_info->nsaddrinfo[n - 1];
2115    }
2116    return ai;
2117}
2118
2119void
2120_resolv_set_addr_of_iface(const char* ifname, struct in_addr* addr)
2121{
2122    pthread_once(&_res_cache_once, _res_cache_init);
2123    pthread_mutex_lock(&_res_cache_list_lock);
2124    struct resolv_cache_info* cache_info = _find_cache_info_locked(ifname);
2125    if (cache_info) {
2126        memcpy(&cache_info->ifaddr, addr, sizeof(*addr));
2127
2128        if (DEBUG) {
2129            char* addr_s = inet_ntoa(cache_info->ifaddr);
2130            XLOG("address of interface %s is %s\n", ifname, addr_s);
2131        }
2132    }
2133    pthread_mutex_unlock(&_res_cache_list_lock);
2134}
2135
2136struct in_addr*
2137_resolv_get_addr_of_default_iface(void)
2138{
2139    struct in_addr* ai = NULL;
2140    char* ifname;
2141
2142    pthread_once(&_res_cache_once, _res_cache_init);
2143    pthread_mutex_lock(&_res_cache_list_lock);
2144    ifname = _get_default_iface_locked();
2145    ai = _get_addr_locked(ifname);
2146    pthread_mutex_unlock(&_res_cache_list_lock);
2147
2148    return ai;
2149}
2150
2151struct in_addr*
2152_resolv_get_addr_of_iface(const char* ifname)
2153{
2154    struct in_addr* ai = NULL;
2155
2156    pthread_once(&_res_cache_once, _res_cache_init);
2157    pthread_mutex_lock(&_res_cache_list_lock);
2158    ai =_get_addr_locked(ifname);
2159    pthread_mutex_unlock(&_res_cache_list_lock);
2160    return ai;
2161}
2162
2163static struct in_addr*
2164_get_addr_locked(const char * ifname)
2165{
2166    struct resolv_cache_info* cache_info = _find_cache_info_locked(ifname);
2167    if (cache_info) {
2168        return &cache_info->ifaddr;
2169    }
2170    return NULL;
2171}
2172