1/* dnsmasq is Copyright (c) 2000-2009 Simon Kelley
2
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License as published by
5   the Free Software Foundation; version 2 dated June, 1991, or
6   (at your option) version 3 dated 29 June, 2007.
7
8   This program is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   GNU General Public License for more details.
12
13   You should have received a copy of the GNU General Public License
14   along with this program.  If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#include "dnsmasq.h"
18
19static struct frec *lookup_frec(unsigned short id, unsigned int crc);
20static struct frec *lookup_frec_by_sender(unsigned short id,
21					  union mysockaddr *addr,
22					  unsigned int crc);
23static unsigned short get_id(int force, unsigned short force_id, unsigned int crc);
24static void free_frec(struct frec *f);
25static struct randfd *allocate_rfd(int family);
26
27/* Send a UDP packet with its source address set as "source"
28   unless nowild is true, when we just send it with the kernel default */
29static void send_from(int fd, int nowild, char *packet, size_t len,
30		      union mysockaddr *to, struct all_addr *source,
31		      unsigned int iface)
32{
33  struct msghdr msg;
34  struct iovec iov[1];
35  union {
36    struct cmsghdr align; /* this ensures alignment */
37#if defined(HAVE_LINUX_NETWORK)
38    char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39#elif defined(IP_SENDSRCADDR)
40    char control[CMSG_SPACE(sizeof(struct in_addr))];
41#endif
42#ifdef HAVE_IPV6
43    char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44#endif
45  } control_u;
46
47  iov[0].iov_base = packet;
48  iov[0].iov_len = len;
49
50  msg.msg_control = NULL;
51  msg.msg_controllen = 0;
52  msg.msg_flags = 0;
53  msg.msg_name = to;
54  msg.msg_namelen = sa_len(to);
55  msg.msg_iov = iov;
56  msg.msg_iovlen = 1;
57
58  if (!nowild)
59    {
60      struct cmsghdr *cmptr;
61      msg.msg_control = &control_u;
62      msg.msg_controllen = sizeof(control_u);
63      cmptr = CMSG_FIRSTHDR(&msg);
64
65      if (to->sa.sa_family == AF_INET)
66	{
67#if defined(HAVE_LINUX_NETWORK)
68	  struct in_pktinfo *pkt = (struct in_pktinfo *)CMSG_DATA(cmptr);
69	  pkt->ipi_ifindex = 0;
70	  pkt->ipi_spec_dst = source->addr.addr4;
71	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
72	  cmptr->cmsg_level = SOL_IP;
73	  cmptr->cmsg_type = IP_PKTINFO;
74#elif defined(IP_SENDSRCADDR)
75	  struct in_addr *a = (struct in_addr *)CMSG_DATA(cmptr);
76	  *a = source->addr.addr4;
77	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78	  cmptr->cmsg_level = IPPROTO_IP;
79	  cmptr->cmsg_type = IP_SENDSRCADDR;
80#endif
81	}
82      else
83#ifdef HAVE_IPV6
84	{
85	  struct in6_pktinfo *pkt = (struct in6_pktinfo *)CMSG_DATA(cmptr);
86	  pkt->ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87	  pkt->ipi6_addr = source->addr.addr6;
88	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
89	  cmptr->cmsg_type = IPV6_PKTINFO;
90	  cmptr->cmsg_level = IPV6_LEVEL;
91	}
92#else
93      iface = 0; /* eliminate warning */
94#endif
95    }
96
97 retry:
98  if (sendmsg(fd, &msg, 0) == -1)
99    {
100      /* certain Linux kernels seem to object to setting the source address in the IPv6 stack
101	 by returning EINVAL from sendmsg. In that case, try again without setting the
102	 source address, since it will nearly alway be correct anyway.  IPv6 stinks. */
103      if (errno == EINVAL && msg.msg_controllen)
104	{
105	  msg.msg_controllen = 0;
106	  goto retry;
107	}
108      if (retry_send())
109	goto retry;
110    }
111}
112
113static unsigned short search_servers(time_t now, struct all_addr **addrpp,
114				     unsigned short qtype, char *qdomain, int *type, char **domain)
115
116{
117  /* If the query ends in the domain in one of our servers, set
118     domain to point to that name. We find the largest match to allow both
119     domain.org and sub.domain.org to exist. */
120
121  unsigned int namelen = strlen(qdomain);
122  unsigned int matchlen = 0;
123  struct server *serv;
124  unsigned short flags = 0;
125
126  for (serv = daemon->servers; serv; serv=serv->next)
127    /* domain matches take priority over NODOTS matches */
128    if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
129      {
130	unsigned short sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
131	*type = SERV_FOR_NODOTS;
132	if (serv->flags & SERV_NO_ADDR)
133	  flags = F_NXDOMAIN;
134	else if (serv->flags & SERV_LITERAL_ADDRESS)
135	  {
136	    if (sflag & qtype)
137	      {
138		flags = sflag;
139		if (serv->addr.sa.sa_family == AF_INET)
140		  *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
141#ifdef HAVE_IPV6
142		else
143		  *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
144#endif
145	      }
146	    else if (!flags || (flags & F_NXDOMAIN))
147	      flags = F_NOERR;
148	  }
149      }
150    else if (serv->flags & SERV_HAS_DOMAIN)
151      {
152	unsigned int domainlen = strlen(serv->domain);
153	char *matchstart = qdomain + namelen - domainlen;
154	if (namelen >= domainlen &&
155	    hostname_isequal(matchstart, serv->domain) &&
156	    domainlen >= matchlen &&
157	    (domainlen == 0 || namelen == domainlen || *(serv->domain) == '.' || *(matchstart-1) == '.' ))
158	  {
159	    unsigned short sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
160	    *type = SERV_HAS_DOMAIN;
161	    *domain = serv->domain;
162	    matchlen = domainlen;
163	    if (serv->flags & SERV_NO_ADDR)
164	      flags = F_NXDOMAIN;
165	    else if (serv->flags & SERV_LITERAL_ADDRESS)
166	      {
167		if (sflag & qtype)
168		  {
169		    flags = sflag;
170		    if (serv->addr.sa.sa_family == AF_INET)
171		      *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
172#ifdef HAVE_IPV6
173		    else
174		      *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
175#endif
176		  }
177		else if (!flags || (flags & F_NXDOMAIN))
178		  flags = F_NOERR;
179	      }
180	  }
181      }
182
183  if (flags == 0 && !(qtype & F_BIGNAME) &&
184      (daemon->options & OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
185    /* don't forward simple names, make exception for NS queries and empty name. */
186    flags = F_NXDOMAIN;
187
188  if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
189    flags = F_NOERR;
190
191  if (flags)
192    {
193      int logflags = 0;
194
195      if (flags == F_NXDOMAIN || flags == F_NOERR)
196	logflags = F_NEG | qtype;
197
198      log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
199    }
200
201  return  flags;
202}
203
204static int forward_query(int udpfd, union mysockaddr *udpaddr,
205			 struct all_addr *dst_addr, unsigned int dst_iface,
206			 HEADER *header, size_t plen, time_t now, struct frec *forward)
207{
208  char *domain = NULL;
209  int type = 0;
210  struct all_addr *addrp = NULL;
211  unsigned int crc = questions_crc(header, plen, daemon->namebuff);
212  unsigned short flags = 0;
213  unsigned short gotname = extract_request(header, plen, daemon->namebuff, NULL);
214  struct server *start = NULL;
215
216  /* may be no servers available. */
217  if (!daemon->servers)
218    forward = NULL;
219  else if (forward || (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, crc)))
220    {
221      /* retry on existing query, send to all available servers  */
222      domain = forward->sentto->domain;
223      forward->sentto->failed_queries++;
224      if (!(daemon->options & OPT_ORDER))
225	{
226	  forward->forwardall = 1;
227	  daemon->last_server = NULL;
228	}
229      type = forward->sentto->flags & SERV_TYPE;
230      if (!(start = forward->sentto->next))
231	start = daemon->servers; /* at end of list, recycle */
232      header->id = htons(forward->new_id);
233    }
234  else
235    {
236      if (gotname)
237	flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain);
238
239      if (!flags && !(forward = get_new_frec(now, NULL)))
240	/* table full - server failure. */
241	flags = F_NEG;
242
243      if (forward)
244	{
245	  /* force unchanging id for signed packets */
246	  int is_sign;
247	  find_pseudoheader(header, plen, NULL, NULL, &is_sign);
248
249	  forward->source = *udpaddr;
250	  forward->dest = *dst_addr;
251	  forward->iface = dst_iface;
252	  forward->orig_id = ntohs(header->id);
253	  forward->new_id = get_id(is_sign, forward->orig_id, crc);
254	  forward->fd = udpfd;
255	  forward->crc = crc;
256	  forward->forwardall = 0;
257	  header->id = htons(forward->new_id);
258
259	  /* In strict_order mode, or when using domain specific servers
260	     always try servers in the order specified in resolv.conf,
261	     otherwise, use the one last known to work. */
262
263	  if (type != 0  || (daemon->options & OPT_ORDER))
264	    start = daemon->servers;
265	  else if (!(start = daemon->last_server) ||
266		   daemon->forwardcount++ > FORWARD_TEST ||
267		   difftime(now, daemon->forwardtime) > FORWARD_TIME)
268	    {
269	      start = daemon->servers;
270	      forward->forwardall = 1;
271	      daemon->forwardcount = 0;
272	      daemon->forwardtime = now;
273	    }
274	}
275    }
276
277  /* check for send errors here (no route to host)
278     if we fail to send to all nameservers, send back an error
279     packet straight away (helps modem users when offline)  */
280
281  if (!flags && forward)
282    {
283      struct server *firstsentto = start;
284      int forwarded = 0;
285
286      while (1)
287	{
288	  /* only send to servers dealing with our domain.
289	     domain may be NULL, in which case server->domain
290	     must be NULL also. */
291
292	  if (type == (start->flags & SERV_TYPE) &&
293	      (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
294	      !(start->flags & SERV_LITERAL_ADDRESS))
295	    {
296	      int fd;
297
298	      /* find server socket to use, may need to get random one. */
299	      if (start->sfd)
300		fd = start->sfd->fd;
301	      else
302		{
303#ifdef HAVE_IPV6
304		  if (start->addr.sa.sa_family == AF_INET6)
305		    {
306		      if (!forward->rfd6 &&
307			  !(forward->rfd6 = allocate_rfd(AF_INET6)))
308			break;
309		      daemon->rfd_save = forward->rfd6;
310		      fd = forward->rfd6->fd;
311		    }
312		  else
313#endif
314		    {
315		      if (!forward->rfd4 &&
316			  !(forward->rfd4 = allocate_rfd(AF_INET)))
317			break;
318		      daemon->rfd_save = forward->rfd4;
319		      fd = forward->rfd4->fd;
320		    }
321
322#ifdef ANDROID
323		  // Mark the socket so it goes out on the correct network. Note
324		  // that we never clear the mark, only re-set it the next time we
325		  // allocate a new random fd. This is because we buffer DNS
326		  // queries (in daemon->srv_save, daemon->packet_len) and socket
327		  // file descriptors (in daemon->rfd_save) with the expectation of
328		  // being able to use them again.
329		  //
330		  // Server fds are marked separately in allocate_sfd.
331		  setsockopt(fd, SOL_SOCKET, SO_MARK, &start->mark, sizeof(start->mark));
332#endif
333		}
334
335	      if (sendto(fd, (char *)header, plen, 0,
336			 &start->addr.sa,
337			 sa_len(&start->addr)) == -1)
338		{
339		  if (retry_send())
340		    continue;
341		}
342	      else
343		{
344		  /* Keep info in case we want to re-send this packet */
345		  daemon->srv_save = start;
346		  daemon->packet_len = plen;
347
348		  if (!gotname)
349		    strcpy(daemon->namebuff, "query");
350		  if (start->addr.sa.sa_family == AF_INET)
351		    log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
352			      (struct all_addr *)&start->addr.in.sin_addr, NULL);
353#ifdef HAVE_IPV6
354		  else
355		    log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
356			      (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
357#endif
358		  start->queries++;
359		  forwarded = 1;
360		  forward->sentto = start;
361		  if (!forward->forwardall)
362		    break;
363		  forward->forwardall++;
364		}
365	    }
366
367	  if (!(start = start->next))
368 	    start = daemon->servers;
369
370	  if (start == firstsentto)
371	    break;
372	}
373
374      if (forwarded)
375	return 1;
376
377      /* could not send on, prepare to return */
378      header->id = htons(forward->orig_id);
379      free_frec(forward); /* cancel */
380    }
381
382  /* could not send on, return empty answer or address if known for whole domain */
383  if (udpfd != -1)
384    {
385      plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
386      send_from(udpfd, daemon->options & OPT_NOWILD, (char *)header, plen, udpaddr, dst_addr, dst_iface);
387    }
388
389  return 0;
390}
391
392static size_t process_reply(HEADER *header, time_t now,
393			    struct server *server, size_t n)
394{
395  unsigned char *pheader, *sizep;
396  int munged = 0, is_sign;
397  size_t plen;
398
399  /* If upstream is advertising a larger UDP packet size
400     than we allow, trim it so that we don't get overlarge
401     requests for the client. We can't do this for signed packets. */
402
403  if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)) && !is_sign)
404    {
405      unsigned short udpsz;
406      unsigned char *psave = sizep;
407
408      GETSHORT(udpsz, sizep);
409      if (udpsz > daemon->edns_pktsz)
410	PUTSHORT(daemon->edns_pktsz, psave);
411    }
412
413  if (header->opcode != QUERY || (header->rcode != NOERROR && header->rcode != NXDOMAIN))
414    return n;
415
416  /* Complain loudly if the upstream server is non-recursive. */
417  if (!header->ra && header->rcode == NOERROR && ntohs(header->ancount) == 0 &&
418      server && !(server->flags & SERV_WARNED_RECURSIVE))
419    {
420      prettyprint_addr(&server->addr, daemon->namebuff);
421      my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
422      if (!(daemon->options & OPT_LOG))
423	server->flags |= SERV_WARNED_RECURSIVE;
424    }
425
426  if (daemon->bogus_addr && header->rcode != NXDOMAIN &&
427      check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
428    {
429      munged = 1;
430      header->rcode = NXDOMAIN;
431      header->aa = 0;
432    }
433  else
434    {
435      if (header->rcode == NXDOMAIN &&
436	  extract_request(header, n, daemon->namebuff, NULL) &&
437	  check_for_local_domain(daemon->namebuff, now))
438	{
439	  /* if we forwarded a query for a locally known name (because it was for
440	     an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
441	     since we know that the domain exists, even if upstream doesn't */
442	  munged = 1;
443	  header->aa = 1;
444	  header->rcode = NOERROR;
445	}
446
447      if (extract_addresses(header, n, daemon->namebuff, now))
448	{
449	  my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected"));
450	  munged = 1;
451	}
452    }
453
454  /* do this after extract_addresses. Ensure NODATA reply and remove
455     nameserver info. */
456
457  if (munged)
458    {
459      header->ancount = htons(0);
460      header->nscount = htons(0);
461      header->arcount = htons(0);
462    }
463
464  /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
465     sections of the packet. Find the new length here and put back pseudoheader
466     if it was removed. */
467  return resize_packet(header, n, pheader, plen);
468}
469
470/* sets new last_server */
471void reply_query(int fd, int family, time_t now)
472{
473  /* packet from peer server, extract data for cache, and send to
474     original requester */
475  HEADER *header;
476  union mysockaddr serveraddr;
477  struct frec *forward;
478  socklen_t addrlen = sizeof(serveraddr);
479  ssize_t n = recvfrom(fd, daemon->packet, daemon->edns_pktsz, 0, &serveraddr.sa, &addrlen);
480  size_t nn;
481  struct server *server;
482
483  /* packet buffer overwritten */
484  daemon->srv_save = NULL;
485
486  /* Determine the address of the server replying  so that we can mark that as good */
487  serveraddr.sa.sa_family = family;
488#ifdef HAVE_IPV6
489  if (serveraddr.sa.sa_family == AF_INET6)
490    serveraddr.in6.sin6_flowinfo = 0;
491#endif
492
493  /* spoof check: answer must come from known server, */
494  for (server = daemon->servers; server; server = server->next)
495    if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
496	sockaddr_isequal(&server->addr, &serveraddr))
497      break;
498
499  header = (HEADER *)daemon->packet;
500
501  if (!server ||
502      n < (int)sizeof(HEADER) || !header->qr ||
503      !(forward = lookup_frec(ntohs(header->id), questions_crc(header, n, daemon->namebuff))))
504    return;
505
506  server = forward->sentto;
507
508  if ((header->rcode == SERVFAIL || header->rcode == REFUSED) &&
509      !(daemon->options & OPT_ORDER) &&
510      forward->forwardall == 0)
511    /* for broken servers, attempt to send to another one. */
512    {
513      unsigned char *pheader;
514      size_t plen;
515      int is_sign;
516
517      /* recreate query from reply */
518      pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
519      if (!is_sign)
520	{
521	  header->ancount = htons(0);
522	  header->nscount = htons(0);
523	  header->arcount = htons(0);
524	  if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
525	    {
526	      header->qr = 0;
527	      header->tc = 0;
528	      forward_query(-1, NULL, NULL, 0, header, nn, now, forward);
529	      return;
530	    }
531	}
532    }
533
534  if ((forward->sentto->flags & SERV_TYPE) == 0)
535    {
536      if (header->rcode == SERVFAIL || header->rcode == REFUSED)
537	server = NULL;
538      else
539	{
540	  struct server *last_server;
541
542	  /* find good server by address if possible, otherwise assume the last one we sent to */
543	  for (last_server = daemon->servers; last_server; last_server = last_server->next)
544	    if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
545		sockaddr_isequal(&last_server->addr, &serveraddr))
546	      {
547		server = last_server;
548		break;
549	      }
550	}
551      if (!(daemon->options & OPT_ALL_SERVERS))
552	daemon->last_server = server;
553    }
554
555  /* If the answer is an error, keep the forward record in place in case
556     we get a good reply from another server. Kill it when we've
557     had replies from all to avoid filling the forwarding table when
558     everything is broken */
559  if (forward->forwardall == 0 || --forward->forwardall == 1 ||
560      (header->rcode != REFUSED && header->rcode != SERVFAIL))
561    {
562      if ((nn = process_reply(header, now, server, (size_t)n)))
563	{
564	  header->id = htons(forward->orig_id);
565	  header->ra = 1; /* recursion if available */
566	  send_from(forward->fd, daemon->options & OPT_NOWILD, daemon->packet, nn,
567		    &forward->source, &forward->dest, forward->iface);
568	}
569      free_frec(forward); /* cancel */
570    }
571}
572
573
574void receive_query(struct listener *listen, time_t now)
575{
576  HEADER *header = (HEADER *)daemon->packet;
577  union mysockaddr source_addr;
578  unsigned short type;
579  struct all_addr dst_addr;
580  struct in_addr netmask, dst_addr_4;
581  size_t m;
582  ssize_t n;
583  int if_index = 0;
584  struct iovec iov[1];
585  struct msghdr msg;
586  struct cmsghdr *cmptr;
587  union {
588    struct cmsghdr align; /* this ensures alignment */
589#ifdef HAVE_IPV6
590    char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
591#endif
592#if defined(HAVE_LINUX_NETWORK)
593    char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
594#elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
595    char control[CMSG_SPACE(sizeof(struct in_addr)) +
596		 CMSG_SPACE(sizeof(unsigned int))];
597#elif defined(IP_RECVDSTADDR)
598    char control[CMSG_SPACE(sizeof(struct in_addr)) +
599		 CMSG_SPACE(sizeof(struct sockaddr_dl))];
600#endif
601  } control_u;
602
603  /* packet buffer overwritten */
604  daemon->srv_save = NULL;
605
606  if (listen->family == AF_INET && (daemon->options & OPT_NOWILD))
607    {
608      dst_addr_4 = listen->iface->addr.in.sin_addr;
609      netmask = listen->iface->netmask;
610    }
611  else
612    {
613      dst_addr_4.s_addr = 0;
614      netmask.s_addr = 0;
615    }
616
617  iov[0].iov_base = daemon->packet;
618  iov[0].iov_len = daemon->edns_pktsz;
619
620  msg.msg_control = control_u.control;
621  msg.msg_controllen = sizeof(control_u);
622  msg.msg_flags = 0;
623  msg.msg_name = &source_addr;
624  msg.msg_namelen = sizeof(source_addr);
625  msg.msg_iov = iov;
626  msg.msg_iovlen = 1;
627
628  if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
629    return;
630
631  if (n < (int)sizeof(HEADER) ||
632      (msg.msg_flags & MSG_TRUNC) ||
633      header->qr)
634    return;
635
636  source_addr.sa.sa_family = listen->family;
637#ifdef HAVE_IPV6
638  if (listen->family == AF_INET6)
639    source_addr.in6.sin6_flowinfo = 0;
640#endif
641
642  if (!(daemon->options & OPT_NOWILD))
643    {
644      struct ifreq ifr;
645
646      if (msg.msg_controllen < sizeof(struct cmsghdr))
647	return;
648
649#if defined(HAVE_LINUX_NETWORK)
650      if (listen->family == AF_INET)
651	for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
652	  if (cmptr->cmsg_level == SOL_IP && cmptr->cmsg_type == IP_PKTINFO)
653	    {
654	      dst_addr_4 = dst_addr.addr.addr4 = ((struct in_pktinfo *)CMSG_DATA(cmptr))->ipi_spec_dst;
655	      if_index = ((struct in_pktinfo *)CMSG_DATA(cmptr))->ipi_ifindex;
656	    }
657#elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
658      if (listen->family == AF_INET)
659	{
660	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
661	    if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
662	      dst_addr_4 = dst_addr.addr.addr4 = *((struct in_addr *)CMSG_DATA(cmptr));
663	    else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
664#ifdef HAVE_SOLARIS_NETWORK
665	      if_index = *((unsigned int *)CMSG_DATA(cmptr));
666#else
667	      if_index = ((struct sockaddr_dl *)CMSG_DATA(cmptr))->sdl_index;
668#endif
669	}
670#endif
671
672#ifdef HAVE_IPV6
673      if (listen->family == AF_INET6)
674	{
675	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
676	    if (cmptr->cmsg_level == IPV6_LEVEL && cmptr->cmsg_type == IPV6_PKTINFO)
677	      {
678		dst_addr.addr.addr6 = ((struct in6_pktinfo *)CMSG_DATA(cmptr))->ipi6_addr;
679		if_index =((struct in6_pktinfo *)CMSG_DATA(cmptr))->ipi6_ifindex;
680	      }
681	}
682#endif
683
684      /* enforce available interface configuration */
685
686      if (!indextoname(listen->fd, if_index, ifr.ifr_name) ||
687	  !iface_check(listen->family, &dst_addr, ifr.ifr_name, &if_index))
688	return;
689
690      if (listen->family == AF_INET &&
691	  (daemon->options & OPT_LOCALISE) &&
692	  ioctl(listen->fd, SIOCGIFNETMASK, &ifr) == -1)
693	return;
694
695      netmask = ((struct sockaddr_in *) &ifr.ifr_addr)->sin_addr;
696    }
697
698  if (extract_request(header, (size_t)n, daemon->namebuff, &type))
699    {
700      char types[20];
701
702      querystr(types, type);
703
704      if (listen->family == AF_INET)
705	log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
706		  (struct all_addr *)&source_addr.in.sin_addr, types);
707#ifdef HAVE_IPV6
708      else
709	log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
710		  (struct all_addr *)&source_addr.in6.sin6_addr, types);
711#endif
712    }
713
714  m = answer_request (header, ((char *) header) + PACKETSZ, (size_t)n,
715		      dst_addr_4, netmask, now);
716  if (m >= 1)
717    {
718      send_from(listen->fd, daemon->options & OPT_NOWILD, (char *)header,
719		m, &source_addr, &dst_addr, if_index);
720      daemon->local_answer++;
721    }
722  else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
723			 header, (size_t)n, now, NULL))
724    daemon->queries_forwarded++;
725  else
726    daemon->local_answer++;
727}
728
729/* The daemon forks before calling this: it should deal with one connection,
730   blocking as neccessary, and then return. Note, need to be a bit careful
731   about resources for debug mode, when the fork is suppressed: that's
732   done by the caller. */
733unsigned char *tcp_request(int confd, time_t now,
734			   struct in_addr local_addr, struct in_addr netmask)
735{
736  int size = 0;
737  size_t m;
738  unsigned short qtype, gotname;
739  unsigned char c1, c2;
740  /* Max TCP packet + slop */
741  unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ);
742  HEADER *header;
743  struct server *last_server;
744
745  while (1)
746    {
747      if (!packet ||
748	  !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
749	  !(size = c1 << 8 | c2) ||
750	  !read_write(confd, packet, size, 1))
751       	return packet;
752
753      if (size < (int)sizeof(HEADER))
754	continue;
755
756      header = (HEADER *)packet;
757
758      if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
759	{
760	  union mysockaddr peer_addr;
761	  socklen_t peer_len = sizeof(union mysockaddr);
762
763	  if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) != -1)
764	    {
765	      char types[20];
766
767	      querystr(types, qtype);
768
769	      if (peer_addr.sa.sa_family == AF_INET)
770		log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
771			  (struct all_addr *)&peer_addr.in.sin_addr, types);
772#ifdef HAVE_IPV6
773	      else
774		log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
775			  (struct all_addr *)&peer_addr.in6.sin6_addr, types);
776#endif
777	    }
778	}
779
780      /* m > 0 if answered from cache */
781      m = answer_request(header, ((char *) header) + 65536, (unsigned int)size,
782			 local_addr, netmask, now);
783
784      /* Do this by steam now we're not in the select() loop */
785      check_log_writer(NULL);
786
787      if (m == 0)
788	{
789	  unsigned short flags = 0;
790	  struct all_addr *addrp = NULL;
791	  int type = 0;
792	  char *domain = NULL;
793
794	  if (gotname)
795	    flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain);
796
797	  if (type != 0  || (daemon->options & OPT_ORDER) || !daemon->last_server)
798	    last_server = daemon->servers;
799	  else
800	    last_server = daemon->last_server;
801
802	  if (!flags && last_server)
803	    {
804	      struct server *firstsendto = NULL;
805	      unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
806
807	      /* Loop round available servers until we succeed in connecting to one.
808	         Note that this code subtley ensures that consecutive queries on this connection
809	         which can go to the same server, do so. */
810	      while (1)
811		{
812		  if (!firstsendto)
813		    firstsendto = last_server;
814		  else
815		    {
816		      if (!(last_server = last_server->next))
817			last_server = daemon->servers;
818
819		      if (last_server == firstsendto)
820			break;
821		    }
822
823		  /* server for wrong domain */
824		  if (type != (last_server->flags & SERV_TYPE) ||
825		      (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
826		    continue;
827
828		  if ((last_server->tcpfd == -1) &&
829		      (last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) != -1 &&
830		      (!local_bind(last_server->tcpfd, &last_server->source_addr,
831				   last_server->interface, last_server->mark, 1) ||
832		       connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
833		    {
834		      close(last_server->tcpfd);
835		      last_server->tcpfd = -1;
836		    }
837
838		  if (last_server->tcpfd == -1)
839		    continue;
840
841		  c1 = size >> 8;
842		  c2 = size;
843
844		  if (!read_write(last_server->tcpfd, &c1, 1, 0) ||
845		      !read_write(last_server->tcpfd, &c2, 1, 0) ||
846		      !read_write(last_server->tcpfd, packet, size, 0) ||
847		      !read_write(last_server->tcpfd, &c1, 1, 1) ||
848		      !read_write(last_server->tcpfd, &c2, 1, 1))
849		    {
850		      close(last_server->tcpfd);
851		      last_server->tcpfd = -1;
852		      continue;
853		    }
854
855		  m = (c1 << 8) | c2;
856		  if (!read_write(last_server->tcpfd, packet, m, 1))
857		    return packet;
858
859		  if (!gotname)
860		    strcpy(daemon->namebuff, "query");
861		  if (last_server->addr.sa.sa_family == AF_INET)
862		    log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
863			      (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
864#ifdef HAVE_IPV6
865		  else
866		    log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
867			      (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
868#endif
869
870		  /* There's no point in updating the cache, since this process will exit and
871		     lose the information after a few queries. We make this call for the alias and
872		     bogus-nxdomain side-effects. */
873		  /* If the crc of the question section doesn't match the crc we sent, then
874		     someone might be attempting to insert bogus values into the cache by
875		     sending replies containing questions and bogus answers. */
876		  if (crc == questions_crc(header, (unsigned int)m, daemon->namebuff))
877		    m = process_reply(header, now, last_server, (unsigned int)m);
878
879		  break;
880		}
881	    }
882
883	  /* In case of local answer or no connections made. */
884	  if (m == 0)
885	    m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
886	}
887
888      check_log_writer(NULL);
889
890      c1 = m>>8;
891      c2 = m;
892      if (!read_write(confd, &c1, 1, 0) ||
893	  !read_write(confd, &c2, 1, 0) ||
894	  !read_write(confd, packet, m, 0))
895	return packet;
896    }
897}
898
899static struct frec *allocate_frec(time_t now)
900{
901  struct frec *f;
902
903  if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
904    {
905      f->next = daemon->frec_list;
906      f->time = now;
907      f->sentto = NULL;
908      f->rfd4 = NULL;
909#ifdef HAVE_IPV6
910      f->rfd6 = NULL;
911#endif
912      daemon->frec_list = f;
913    }
914
915  return f;
916}
917
918static struct randfd *allocate_rfd(int family)
919{
920  static int finger = 0;
921  int i;
922
923  /* limit the number of sockets we have open to avoid starvation of
924     (eg) TFTP. Once we have a reasonable number, randomness should be OK */
925
926  for (i = 0; i < RANDOM_SOCKS; i++)
927    if (daemon->randomsocks[i].refcount == 0)
928      {
929	if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
930	  break;
931
932	daemon->randomsocks[i].refcount = 1;
933	daemon->randomsocks[i].family = family;
934	return &daemon->randomsocks[i];
935      }
936
937  /* No free ones or cannot get new socket, grab an existing one */
938  for (i = 0; i < RANDOM_SOCKS; i++)
939    {
940      int j = (i+finger) % RANDOM_SOCKS;
941      if (daemon->randomsocks[j].refcount != 0 &&
942	  daemon->randomsocks[j].family == family &&
943	  daemon->randomsocks[j].refcount != 0xffff)
944	{
945	  finger = j;
946	  daemon->randomsocks[j].refcount++;
947	  return &daemon->randomsocks[j];
948	}
949    }
950
951  return NULL; /* doom */
952}
953
954static void free_frec(struct frec *f)
955{
956  if (f->rfd4 && --(f->rfd4->refcount) == 0)
957    close(f->rfd4->fd);
958
959  f->rfd4 = NULL;
960  f->sentto = NULL;
961
962#ifdef HAVE_IPV6
963  if (f->rfd6 && --(f->rfd6->refcount) == 0)
964    close(f->rfd6->fd);
965
966  f->rfd6 = NULL;
967#endif
968}
969
970/* if wait==NULL return a free or older than TIMEOUT record.
971   else return *wait zero if one available, or *wait is delay to
972   when the oldest in-use record will expire. Impose an absolute
973   limit of 4*TIMEOUT before we wipe things (for random sockets) */
974struct frec *get_new_frec(time_t now, int *wait)
975{
976  struct frec *f, *oldest, *target;
977  int count;
978
979  if (wait)
980    *wait = 0;
981
982  for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next, count++)
983    if (!f->sentto)
984      target = f;
985    else
986      {
987	if (difftime(now, f->time) >= 4*TIMEOUT)
988	  {
989	    free_frec(f);
990	    target = f;
991	  }
992
993	if (!oldest || difftime(f->time, oldest->time) <= 0)
994	  oldest = f;
995      }
996
997  if (target)
998    {
999      target->time = now;
1000      return target;
1001    }
1002
1003  /* can't find empty one, use oldest if there is one
1004     and it's older than timeout */
1005  if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
1006    {
1007      /* keep stuff for twice timeout if we can by allocating a new
1008	 record instead */
1009      if (difftime(now, oldest->time) < 2*TIMEOUT &&
1010	  count <= daemon->ftabsize &&
1011	  (f = allocate_frec(now)))
1012	return f;
1013
1014      if (!wait)
1015	{
1016	  free_frec(oldest);
1017	  oldest->time = now;
1018	}
1019      return oldest;
1020    }
1021
1022  /* none available, calculate time 'till oldest record expires */
1023  if (count > daemon->ftabsize)
1024    {
1025      if (oldest && wait)
1026	*wait = oldest->time + (time_t)TIMEOUT - now;
1027      return NULL;
1028    }
1029
1030  if (!(f = allocate_frec(now)) && wait)
1031    /* wait one second on malloc failure */
1032    *wait = 1;
1033
1034  return f; /* OK if malloc fails and this is NULL */
1035}
1036
1037/* crc is all-ones if not known. */
1038static struct frec *lookup_frec(unsigned short id, unsigned int crc)
1039{
1040  struct frec *f;
1041
1042  for(f = daemon->frec_list; f; f = f->next)
1043    if (f->sentto && f->new_id == id &&
1044	(f->crc == crc || crc == 0xffffffff))
1045      return f;
1046
1047  return NULL;
1048}
1049
1050static struct frec *lookup_frec_by_sender(unsigned short id,
1051					  union mysockaddr *addr,
1052					  unsigned int crc)
1053{
1054  struct frec *f;
1055
1056  for(f = daemon->frec_list; f; f = f->next)
1057    if (f->sentto &&
1058	f->orig_id == id &&
1059	f->crc == crc &&
1060	sockaddr_isequal(&f->source, addr))
1061      return f;
1062
1063  return NULL;
1064}
1065
1066/* A server record is going away, remove references to it */
1067void server_gone(struct server *server)
1068{
1069  struct frec *f;
1070
1071  for (f = daemon->frec_list; f; f = f->next)
1072    if (f->sentto && f->sentto == server)
1073      free_frec(f);
1074
1075  if (daemon->last_server == server)
1076    daemon->last_server = NULL;
1077
1078  if (daemon->srv_save == server)
1079    daemon->srv_save = NULL;
1080}
1081
1082/* return unique random ids.
1083   For signed packets we can't change the ID without breaking the
1084   signing, so we keep the same one. In this case force is set, and this
1085   routine degenerates into killing any conflicting forward record. */
1086static unsigned short get_id(int force, unsigned short force_id, unsigned int crc)
1087{
1088  unsigned short ret = 0;
1089
1090  if (force)
1091    {
1092      struct frec *f = lookup_frec(force_id, crc);
1093      if (f)
1094	free_frec(f); /* free */
1095      ret = force_id;
1096    }
1097  else do
1098    ret = rand16();
1099  while (lookup_frec(ret, crc));
1100
1101  return ret;
1102}
1103
1104
1105
1106
1107
1108