1/* dnsmasq is Copyright (c) 2000-2009 Simon Kelley
2
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License as published by
5   the Free Software Foundation; version 2 dated June, 1991, or
6   (at your option) version 3 dated 29 June, 2007.
7
8   This program is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   GNU General Public License for more details.
12
13   You should have received a copy of the GNU General Public License
14   along with this program.  If not, see <http://www.gnu.org/licenses/>.
15*/
16
17#include "dnsmasq.h"
18
19static struct frec *lookup_frec(unsigned short id, unsigned int crc);
20static struct frec *lookup_frec_by_sender(unsigned short id,
21					  union mysockaddr *addr,
22					  unsigned int crc);
23static unsigned short get_id(int force, unsigned short force_id, unsigned int crc);
24static void free_frec(struct frec *f);
25static struct randfd *allocate_rfd(int family);
26
27/* Send a UDP packet with its source address set as "source"
28   unless nowild is true, when we just send it with the kernel default */
29static void send_from(int fd, int nowild, char *packet, size_t len,
30		      union mysockaddr *to, struct all_addr *source,
31		      unsigned int iface)
32{
33  struct msghdr msg;
34  struct iovec iov[1];
35  union {
36    struct cmsghdr align; /* this ensures alignment */
37#if defined(HAVE_LINUX_NETWORK)
38    char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
39#elif defined(IP_SENDSRCADDR)
40    char control[CMSG_SPACE(sizeof(struct in_addr))];
41#endif
42#ifdef HAVE_IPV6
43    char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
44#endif
45  } control_u;
46
47  iov[0].iov_base = packet;
48  iov[0].iov_len = len;
49
50  msg.msg_control = NULL;
51  msg.msg_controllen = 0;
52  msg.msg_flags = 0;
53  msg.msg_name = to;
54  msg.msg_namelen = sa_len(to);
55  msg.msg_iov = iov;
56  msg.msg_iovlen = 1;
57
58  if (!nowild)
59    {
60      struct cmsghdr *cmptr;
61      msg.msg_control = &control_u;
62      msg.msg_controllen = sizeof(control_u);
63      cmptr = CMSG_FIRSTHDR(&msg);
64
65      if (to->sa.sa_family == AF_INET)
66	{
67#if defined(HAVE_LINUX_NETWORK)
68	  struct in_pktinfo *pkt = (struct in_pktinfo *)CMSG_DATA(cmptr);
69	  pkt->ipi_ifindex = 0;
70	  pkt->ipi_spec_dst = source->addr.addr4;
71	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
72	  cmptr->cmsg_level = SOL_IP;
73	  cmptr->cmsg_type = IP_PKTINFO;
74#elif defined(IP_SENDSRCADDR)
75	  struct in_addr *a = (struct in_addr *)CMSG_DATA(cmptr);
76	  *a = source->addr.addr4;
77	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
78	  cmptr->cmsg_level = IPPROTO_IP;
79	  cmptr->cmsg_type = IP_SENDSRCADDR;
80#endif
81	}
82      else
83#ifdef HAVE_IPV6
84	{
85	  struct in6_pktinfo *pkt = (struct in6_pktinfo *)CMSG_DATA(cmptr);
86	  pkt->ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
87	  pkt->ipi6_addr = source->addr.addr6;
88	  msg.msg_controllen = cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
89	  cmptr->cmsg_type = IPV6_PKTINFO;
90	  cmptr->cmsg_level = IPV6_LEVEL;
91	}
92#else
93      iface = 0; /* eliminate warning */
94#endif
95    }
96
97 retry:
98  if (sendmsg(fd, &msg, 0) == -1)
99    {
100      /* certain Linux kernels seem to object to setting the source address in the IPv6 stack
101	 by returning EINVAL from sendmsg. In that case, try again without setting the
102	 source address, since it will nearly alway be correct anyway.  IPv6 stinks. */
103      if (errno == EINVAL && msg.msg_controllen)
104	{
105	  msg.msg_controllen = 0;
106	  goto retry;
107	}
108      if (retry_send())
109	goto retry;
110    }
111}
112
113static unsigned short search_servers(time_t now, struct all_addr **addrpp,
114				     unsigned short qtype, char *qdomain, int *type, char **domain)
115
116{
117  /* If the query ends in the domain in one of our servers, set
118     domain to point to that name. We find the largest match to allow both
119     domain.org and sub.domain.org to exist. */
120
121  unsigned int namelen = strlen(qdomain);
122  unsigned int matchlen = 0;
123  struct server *serv;
124  unsigned short flags = 0;
125
126  for (serv = daemon->servers; serv; serv=serv->next)
127    /* domain matches take priority over NODOTS matches */
128    if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
129      {
130	unsigned short sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
131	*type = SERV_FOR_NODOTS;
132	if (serv->flags & SERV_NO_ADDR)
133	  flags = F_NXDOMAIN;
134	else if (serv->flags & SERV_LITERAL_ADDRESS)
135	  {
136	    if (sflag & qtype)
137	      {
138		flags = sflag;
139		if (serv->addr.sa.sa_family == AF_INET)
140		  *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
141#ifdef HAVE_IPV6
142		else
143		  *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
144#endif
145	      }
146	    else if (!flags || (flags & F_NXDOMAIN))
147	      flags = F_NOERR;
148	  }
149      }
150    else if (serv->flags & SERV_HAS_DOMAIN)
151      {
152	unsigned int domainlen = strlen(serv->domain);
153	char *matchstart = qdomain + namelen - domainlen;
154	if (namelen >= domainlen &&
155	    hostname_isequal(matchstart, serv->domain) &&
156	    domainlen >= matchlen &&
157	    (domainlen == 0 || namelen == domainlen || *(serv->domain) == '.' || *(matchstart-1) == '.' ))
158	  {
159	    unsigned short sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
160	    *type = SERV_HAS_DOMAIN;
161	    *domain = serv->domain;
162	    matchlen = domainlen;
163	    if (serv->flags & SERV_NO_ADDR)
164	      flags = F_NXDOMAIN;
165	    else if (serv->flags & SERV_LITERAL_ADDRESS)
166	      {
167		if (sflag & qtype)
168		  {
169		    flags = sflag;
170		    if (serv->addr.sa.sa_family == AF_INET)
171		      *addrpp = (struct all_addr *)&serv->addr.in.sin_addr;
172#ifdef HAVE_IPV6
173		    else
174		      *addrpp = (struct all_addr *)&serv->addr.in6.sin6_addr;
175#endif
176		  }
177		else if (!flags || (flags & F_NXDOMAIN))
178		  flags = F_NOERR;
179	      }
180	  }
181      }
182
183  if (flags == 0 && !(qtype & F_BIGNAME) &&
184      (daemon->options & OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
185    /* don't forward simple names, make exception for NS queries and empty name. */
186    flags = F_NXDOMAIN;
187
188  if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
189    flags = F_NOERR;
190
191  if (flags)
192    {
193      int logflags = 0;
194
195      if (flags == F_NXDOMAIN || flags == F_NOERR)
196	logflags = F_NEG | qtype;
197
198      log_query(logflags | flags | F_CONFIG | F_FORWARD, qdomain, *addrpp, NULL);
199    }
200
201  return  flags;
202}
203
204static int forward_query(int udpfd, union mysockaddr *udpaddr,
205			 struct all_addr *dst_addr, unsigned int dst_iface,
206			 HEADER *header, size_t plen, time_t now, struct frec *forward)
207{
208  char *domain = NULL;
209  int type = 0;
210  struct all_addr *addrp = NULL;
211  unsigned int crc = questions_crc(header, plen, daemon->namebuff);
212  unsigned short flags = 0;
213  unsigned short gotname = extract_request(header, plen, daemon->namebuff, NULL);
214  struct server *start = NULL;
215
216  /* may be no servers available. */
217  if (!daemon->servers)
218    forward = NULL;
219  else if (forward || (forward = lookup_frec_by_sender(ntohs(header->id), udpaddr, crc)))
220    {
221      /* retry on existing query, send to all available servers  */
222      domain = forward->sentto->domain;
223      forward->sentto->failed_queries++;
224      if (!(daemon->options & OPT_ORDER))
225	{
226	  forward->forwardall = 1;
227	  daemon->last_server = NULL;
228	}
229      type = forward->sentto->flags & SERV_TYPE;
230      if (!(start = forward->sentto->next))
231	start = daemon->servers; /* at end of list, recycle */
232      header->id = htons(forward->new_id);
233    }
234  else
235    {
236      if (gotname)
237	flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain);
238
239      if (!flags && !(forward = get_new_frec(now, NULL)))
240	/* table full - server failure. */
241	flags = F_NEG;
242
243      if (forward)
244	{
245	  /* force unchanging id for signed packets */
246	  int is_sign;
247	  find_pseudoheader(header, plen, NULL, NULL, &is_sign);
248
249	  forward->source = *udpaddr;
250	  forward->dest = *dst_addr;
251	  forward->iface = dst_iface;
252	  forward->orig_id = ntohs(header->id);
253	  forward->new_id = get_id(is_sign, forward->orig_id, crc);
254	  forward->fd = udpfd;
255	  forward->crc = crc;
256	  forward->forwardall = 0;
257	  header->id = htons(forward->new_id);
258
259	  /* In strict_order mode, or when using domain specific servers
260	     always try servers in the order specified in resolv.conf,
261	     otherwise, use the one last known to work. */
262
263	  if (type != 0  || (daemon->options & OPT_ORDER))
264	    start = daemon->servers;
265	  else if (!(start = daemon->last_server) ||
266		   daemon->forwardcount++ > FORWARD_TEST ||
267		   difftime(now, daemon->forwardtime) > FORWARD_TIME)
268	    {
269	      start = daemon->servers;
270	      forward->forwardall = 1;
271	      daemon->forwardcount = 0;
272	      daemon->forwardtime = now;
273	    }
274	}
275    }
276
277  /* check for send errors here (no route to host)
278     if we fail to send to all nameservers, send back an error
279     packet straight away (helps modem users when offline)  */
280
281  if (!flags && forward)
282    {
283      struct server *firstsentto = start;
284      int forwarded = 0;
285
286      while (1)
287	{
288	  /* only send to servers dealing with our domain.
289	     domain may be NULL, in which case server->domain
290	     must be NULL also. */
291
292	  if (type == (start->flags & SERV_TYPE) &&
293	      (type != SERV_HAS_DOMAIN || hostname_isequal(domain, start->domain)) &&
294	      !(start->flags & SERV_LITERAL_ADDRESS))
295	    {
296	      int fd;
297
298	      /* find server socket to use, may need to get random one. */
299	      if (start->sfd)
300		fd = start->sfd->fd;
301	      else
302		{
303#ifdef HAVE_IPV6
304		  if (start->addr.sa.sa_family == AF_INET6)
305		    {
306		      if (!forward->rfd6 &&
307			  !(forward->rfd6 = allocate_rfd(AF_INET6)))
308			break;
309		      daemon->rfd_save = forward->rfd6;
310		      fd = forward->rfd6->fd;
311		    }
312		  else
313#endif
314		    {
315		      if (!forward->rfd4 &&
316			  !(forward->rfd4 = allocate_rfd(AF_INET)))
317			break;
318		      daemon->rfd_save = forward->rfd4;
319		      fd = forward->rfd4->fd;
320		    }
321		}
322
323	      if (sendto(fd, (char *)header, plen, 0,
324			 &start->addr.sa,
325			 sa_len(&start->addr)) == -1)
326		{
327		  if (retry_send())
328		    continue;
329		}
330	      else
331		{
332		  /* Keep info in case we want to re-send this packet */
333		  daemon->srv_save = start;
334		  daemon->packet_len = plen;
335
336		  if (!gotname)
337		    strcpy(daemon->namebuff, "query");
338		  if (start->addr.sa.sa_family == AF_INET)
339		    log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
340			      (struct all_addr *)&start->addr.in.sin_addr, NULL);
341#ifdef HAVE_IPV6
342		  else
343		    log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
344			      (struct all_addr *)&start->addr.in6.sin6_addr, NULL);
345#endif
346		  start->queries++;
347		  forwarded = 1;
348		  forward->sentto = start;
349		  if (!forward->forwardall)
350		    break;
351		  forward->forwardall++;
352		}
353	    }
354
355	  if (!(start = start->next))
356 	    start = daemon->servers;
357
358	  if (start == firstsentto)
359	    break;
360	}
361
362      if (forwarded)
363	return 1;
364
365      /* could not send on, prepare to return */
366      header->id = htons(forward->orig_id);
367      free_frec(forward); /* cancel */
368    }
369
370  /* could not send on, return empty answer or address if known for whole domain */
371  if (udpfd != -1)
372    {
373      plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
374      send_from(udpfd, daemon->options & OPT_NOWILD, (char *)header, plen, udpaddr, dst_addr, dst_iface);
375    }
376
377  return 0;
378}
379
380static size_t process_reply(HEADER *header, time_t now,
381			    struct server *server, size_t n)
382{
383  unsigned char *pheader, *sizep;
384  int munged = 0, is_sign;
385  size_t plen;
386
387  /* If upstream is advertising a larger UDP packet size
388     than we allow, trim it so that we don't get overlarge
389     requests for the client. We can't do this for signed packets. */
390
391  if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign)) && !is_sign)
392    {
393      unsigned short udpsz;
394      unsigned char *psave = sizep;
395
396      GETSHORT(udpsz, sizep);
397      if (udpsz > daemon->edns_pktsz)
398	PUTSHORT(daemon->edns_pktsz, psave);
399    }
400
401  if (header->opcode != QUERY || (header->rcode != NOERROR && header->rcode != NXDOMAIN))
402    return n;
403
404  /* Complain loudly if the upstream server is non-recursive. */
405  if (!header->ra && header->rcode == NOERROR && ntohs(header->ancount) == 0 &&
406      server && !(server->flags & SERV_WARNED_RECURSIVE))
407    {
408      prettyprint_addr(&server->addr, daemon->namebuff);
409      my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
410      if (!(daemon->options & OPT_LOG))
411	server->flags |= SERV_WARNED_RECURSIVE;
412    }
413
414  if (daemon->bogus_addr && header->rcode != NXDOMAIN &&
415      check_for_bogus_wildcard(header, n, daemon->namebuff, daemon->bogus_addr, now))
416    {
417      munged = 1;
418      header->rcode = NXDOMAIN;
419      header->aa = 0;
420    }
421  else
422    {
423      if (header->rcode == NXDOMAIN &&
424	  extract_request(header, n, daemon->namebuff, NULL) &&
425	  check_for_local_domain(daemon->namebuff, now))
426	{
427	  /* if we forwarded a query for a locally known name (because it was for
428	     an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
429	     since we know that the domain exists, even if upstream doesn't */
430	  munged = 1;
431	  header->aa = 1;
432	  header->rcode = NOERROR;
433	}
434
435      if (extract_addresses(header, n, daemon->namebuff, now))
436	{
437	  my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected"));
438	  munged = 1;
439	}
440    }
441
442  /* do this after extract_addresses. Ensure NODATA reply and remove
443     nameserver info. */
444
445  if (munged)
446    {
447      header->ancount = htons(0);
448      header->nscount = htons(0);
449      header->arcount = htons(0);
450    }
451
452  /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
453     sections of the packet. Find the new length here and put back pseudoheader
454     if it was removed. */
455  return resize_packet(header, n, pheader, plen);
456}
457
458/* sets new last_server */
459void reply_query(int fd, int family, time_t now)
460{
461  /* packet from peer server, extract data for cache, and send to
462     original requester */
463  HEADER *header;
464  union mysockaddr serveraddr;
465  struct frec *forward;
466  socklen_t addrlen = sizeof(serveraddr);
467  ssize_t n = recvfrom(fd, daemon->packet, daemon->edns_pktsz, 0, &serveraddr.sa, &addrlen);
468  size_t nn;
469  struct server *server;
470
471  /* packet buffer overwritten */
472  daemon->srv_save = NULL;
473
474  /* Determine the address of the server replying  so that we can mark that as good */
475  serveraddr.sa.sa_family = family;
476#ifdef HAVE_IPV6
477  if (serveraddr.sa.sa_family == AF_INET6)
478    serveraddr.in6.sin6_flowinfo = 0;
479#endif
480
481  /* spoof check: answer must come from known server, */
482  for (server = daemon->servers; server; server = server->next)
483    if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
484	sockaddr_isequal(&server->addr, &serveraddr))
485      break;
486
487  header = (HEADER *)daemon->packet;
488
489  if (!server ||
490      n < (int)sizeof(HEADER) || !header->qr ||
491      !(forward = lookup_frec(ntohs(header->id), questions_crc(header, n, daemon->namebuff))))
492    return;
493
494  server = forward->sentto;
495
496  if ((header->rcode == SERVFAIL || header->rcode == REFUSED) &&
497      !(daemon->options & OPT_ORDER) &&
498      forward->forwardall == 0)
499    /* for broken servers, attempt to send to another one. */
500    {
501      unsigned char *pheader;
502      size_t plen;
503      int is_sign;
504
505      /* recreate query from reply */
506      pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign);
507      if (!is_sign)
508	{
509	  header->ancount = htons(0);
510	  header->nscount = htons(0);
511	  header->arcount = htons(0);
512	  if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
513	    {
514	      header->qr = 0;
515	      header->tc = 0;
516	      forward_query(-1, NULL, NULL, 0, header, nn, now, forward);
517	      return;
518	    }
519	}
520    }
521
522  if ((forward->sentto->flags & SERV_TYPE) == 0)
523    {
524      if (header->rcode == SERVFAIL || header->rcode == REFUSED)
525	server = NULL;
526      else
527	{
528	  struct server *last_server;
529
530	  /* find good server by address if possible, otherwise assume the last one we sent to */
531	  for (last_server = daemon->servers; last_server; last_server = last_server->next)
532	    if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
533		sockaddr_isequal(&last_server->addr, &serveraddr))
534	      {
535		server = last_server;
536		break;
537	      }
538	}
539      if (!(daemon->options & OPT_ALL_SERVERS))
540	daemon->last_server = server;
541    }
542
543  /* If the answer is an error, keep the forward record in place in case
544     we get a good reply from another server. Kill it when we've
545     had replies from all to avoid filling the forwarding table when
546     everything is broken */
547  if (forward->forwardall == 0 || --forward->forwardall == 1 ||
548      (header->rcode != REFUSED && header->rcode != SERVFAIL))
549    {
550      if ((nn = process_reply(header, now, server, (size_t)n)))
551	{
552	  header->id = htons(forward->orig_id);
553	  header->ra = 1; /* recursion if available */
554	  send_from(forward->fd, daemon->options & OPT_NOWILD, daemon->packet, nn,
555		    &forward->source, &forward->dest, forward->iface);
556	}
557      free_frec(forward); /* cancel */
558    }
559}
560
561
562void receive_query(struct listener *listen, time_t now)
563{
564  HEADER *header = (HEADER *)daemon->packet;
565  union mysockaddr source_addr;
566  unsigned short type;
567  struct all_addr dst_addr;
568  struct in_addr netmask, dst_addr_4;
569  size_t m;
570  ssize_t n;
571  int if_index = 0;
572  struct iovec iov[1];
573  struct msghdr msg;
574  struct cmsghdr *cmptr;
575  union {
576    struct cmsghdr align; /* this ensures alignment */
577#ifdef HAVE_IPV6
578    char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
579#endif
580#if defined(HAVE_LINUX_NETWORK)
581    char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
582#elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
583    char control[CMSG_SPACE(sizeof(struct in_addr)) +
584		 CMSG_SPACE(sizeof(unsigned int))];
585#elif defined(IP_RECVDSTADDR)
586    char control[CMSG_SPACE(sizeof(struct in_addr)) +
587		 CMSG_SPACE(sizeof(struct sockaddr_dl))];
588#endif
589  } control_u;
590
591  /* packet buffer overwritten */
592  daemon->srv_save = NULL;
593
594  if (listen->family == AF_INET && (daemon->options & OPT_NOWILD))
595    {
596      dst_addr_4 = listen->iface->addr.in.sin_addr;
597      netmask = listen->iface->netmask;
598    }
599  else
600    {
601      dst_addr_4.s_addr = 0;
602      netmask.s_addr = 0;
603    }
604
605  iov[0].iov_base = daemon->packet;
606  iov[0].iov_len = daemon->edns_pktsz;
607
608  msg.msg_control = control_u.control;
609  msg.msg_controllen = sizeof(control_u);
610  msg.msg_flags = 0;
611  msg.msg_name = &source_addr;
612  msg.msg_namelen = sizeof(source_addr);
613  msg.msg_iov = iov;
614  msg.msg_iovlen = 1;
615
616  if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
617    return;
618
619  if (n < (int)sizeof(HEADER) ||
620      (msg.msg_flags & MSG_TRUNC) ||
621      header->qr)
622    return;
623
624  source_addr.sa.sa_family = listen->family;
625#ifdef HAVE_IPV6
626  if (listen->family == AF_INET6)
627    source_addr.in6.sin6_flowinfo = 0;
628#endif
629
630  if (!(daemon->options & OPT_NOWILD))
631    {
632      struct ifreq ifr;
633
634      if (msg.msg_controllen < sizeof(struct cmsghdr))
635	return;
636
637#if defined(HAVE_LINUX_NETWORK)
638      if (listen->family == AF_INET)
639	for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
640	  if (cmptr->cmsg_level == SOL_IP && cmptr->cmsg_type == IP_PKTINFO)
641	    {
642	      dst_addr_4 = dst_addr.addr.addr4 = ((struct in_pktinfo *)CMSG_DATA(cmptr))->ipi_spec_dst;
643	      if_index = ((struct in_pktinfo *)CMSG_DATA(cmptr))->ipi_ifindex;
644	    }
645#elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
646      if (listen->family == AF_INET)
647	{
648	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
649	    if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
650	      dst_addr_4 = dst_addr.addr.addr4 = *((struct in_addr *)CMSG_DATA(cmptr));
651	    else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
652#ifdef HAVE_SOLARIS_NETWORK
653	      if_index = *((unsigned int *)CMSG_DATA(cmptr));
654#else
655	      if_index = ((struct sockaddr_dl *)CMSG_DATA(cmptr))->sdl_index;
656#endif
657	}
658#endif
659
660#ifdef HAVE_IPV6
661      if (listen->family == AF_INET6)
662	{
663	  for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
664	    if (cmptr->cmsg_level == IPV6_LEVEL && cmptr->cmsg_type == IPV6_PKTINFO)
665	      {
666		dst_addr.addr.addr6 = ((struct in6_pktinfo *)CMSG_DATA(cmptr))->ipi6_addr;
667		if_index =((struct in6_pktinfo *)CMSG_DATA(cmptr))->ipi6_ifindex;
668	      }
669	}
670#endif
671
672      /* enforce available interface configuration */
673
674      if (!indextoname(listen->fd, if_index, ifr.ifr_name) ||
675	  !iface_check(listen->family, &dst_addr, ifr.ifr_name, &if_index))
676	return;
677
678      if (listen->family == AF_INET &&
679	  (daemon->options & OPT_LOCALISE) &&
680	  ioctl(listen->fd, SIOCGIFNETMASK, &ifr) == -1)
681	return;
682
683      netmask = ((struct sockaddr_in *) &ifr.ifr_addr)->sin_addr;
684    }
685
686  if (extract_request(header, (size_t)n, daemon->namebuff, &type))
687    {
688      char types[20];
689
690      querystr(types, type);
691
692      if (listen->family == AF_INET)
693	log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
694		  (struct all_addr *)&source_addr.in.sin_addr, types);
695#ifdef HAVE_IPV6
696      else
697	log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
698		  (struct all_addr *)&source_addr.in6.sin6_addr, types);
699#endif
700    }
701
702  m = answer_request (header, ((char *) header) + PACKETSZ, (size_t)n,
703		      dst_addr_4, netmask, now);
704  if (m >= 1)
705    {
706      send_from(listen->fd, daemon->options & OPT_NOWILD, (char *)header,
707		m, &source_addr, &dst_addr, if_index);
708      daemon->local_answer++;
709    }
710  else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
711			 header, (size_t)n, now, NULL))
712    daemon->queries_forwarded++;
713  else
714    daemon->local_answer++;
715}
716
717/* The daemon forks before calling this: it should deal with one connection,
718   blocking as neccessary, and then return. Note, need to be a bit careful
719   about resources for debug mode, when the fork is suppressed: that's
720   done by the caller. */
721unsigned char *tcp_request(int confd, time_t now,
722			   struct in_addr local_addr, struct in_addr netmask)
723{
724  int size = 0;
725  size_t m;
726  unsigned short qtype, gotname;
727  unsigned char c1, c2;
728  /* Max TCP packet + slop */
729  unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ);
730  HEADER *header;
731  struct server *last_server;
732
733  while (1)
734    {
735      if (!packet ||
736	  !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
737	  !(size = c1 << 8 | c2) ||
738	  !read_write(confd, packet, size, 1))
739       	return packet;
740
741      if (size < (int)sizeof(HEADER))
742	continue;
743
744      header = (HEADER *)packet;
745
746      if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
747	{
748	  union mysockaddr peer_addr;
749	  socklen_t peer_len = sizeof(union mysockaddr);
750
751	  if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) != -1)
752	    {
753	      char types[20];
754
755	      querystr(types, qtype);
756
757	      if (peer_addr.sa.sa_family == AF_INET)
758		log_query(F_QUERY | F_IPV4 | F_FORWARD, daemon->namebuff,
759			  (struct all_addr *)&peer_addr.in.sin_addr, types);
760#ifdef HAVE_IPV6
761	      else
762		log_query(F_QUERY | F_IPV6 | F_FORWARD, daemon->namebuff,
763			  (struct all_addr *)&peer_addr.in6.sin6_addr, types);
764#endif
765	    }
766	}
767
768      /* m > 0 if answered from cache */
769      m = answer_request(header, ((char *) header) + 65536, (unsigned int)size,
770			 local_addr, netmask, now);
771
772      /* Do this by steam now we're not in the select() loop */
773      check_log_writer(NULL);
774
775      if (m == 0)
776	{
777	  unsigned short flags = 0;
778	  struct all_addr *addrp = NULL;
779	  int type = 0;
780	  char *domain = NULL;
781
782	  if (gotname)
783	    flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain);
784
785	  if (type != 0  || (daemon->options & OPT_ORDER) || !daemon->last_server)
786	    last_server = daemon->servers;
787	  else
788	    last_server = daemon->last_server;
789
790	  if (!flags && last_server)
791	    {
792	      struct server *firstsendto = NULL;
793	      unsigned int crc = questions_crc(header, (unsigned int)size, daemon->namebuff);
794
795	      /* Loop round available servers until we succeed in connecting to one.
796	         Note that this code subtley ensures that consecutive queries on this connection
797	         which can go to the same server, do so. */
798	      while (1)
799		{
800		  if (!firstsendto)
801		    firstsendto = last_server;
802		  else
803		    {
804		      if (!(last_server = last_server->next))
805			last_server = daemon->servers;
806
807		      if (last_server == firstsendto)
808			break;
809		    }
810
811		  /* server for wrong domain */
812		  if (type != (last_server->flags & SERV_TYPE) ||
813		      (type == SERV_HAS_DOMAIN && !hostname_isequal(domain, last_server->domain)))
814		    continue;
815
816		  if ((last_server->tcpfd == -1) &&
817		      (last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) != -1 &&
818		      (!local_bind(last_server->tcpfd,  &last_server->source_addr, last_server->interface, 1) ||
819		       connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1))
820		    {
821		      close(last_server->tcpfd);
822		      last_server->tcpfd = -1;
823		    }
824
825		  if (last_server->tcpfd == -1)
826		    continue;
827
828		  c1 = size >> 8;
829		  c2 = size;
830
831		  if (!read_write(last_server->tcpfd, &c1, 1, 0) ||
832		      !read_write(last_server->tcpfd, &c2, 1, 0) ||
833		      !read_write(last_server->tcpfd, packet, size, 0) ||
834		      !read_write(last_server->tcpfd, &c1, 1, 1) ||
835		      !read_write(last_server->tcpfd, &c2, 1, 1))
836		    {
837		      close(last_server->tcpfd);
838		      last_server->tcpfd = -1;
839		      continue;
840		    }
841
842		  m = (c1 << 8) | c2;
843		  if (!read_write(last_server->tcpfd, packet, m, 1))
844		    return packet;
845
846		  if (!gotname)
847		    strcpy(daemon->namebuff, "query");
848		  if (last_server->addr.sa.sa_family == AF_INET)
849		    log_query(F_SERVER | F_IPV4 | F_FORWARD, daemon->namebuff,
850			      (struct all_addr *)&last_server->addr.in.sin_addr, NULL);
851#ifdef HAVE_IPV6
852		  else
853		    log_query(F_SERVER | F_IPV6 | F_FORWARD, daemon->namebuff,
854			      (struct all_addr *)&last_server->addr.in6.sin6_addr, NULL);
855#endif
856
857		  /* There's no point in updating the cache, since this process will exit and
858		     lose the information after a few queries. We make this call for the alias and
859		     bogus-nxdomain side-effects. */
860		  /* If the crc of the question section doesn't match the crc we sent, then
861		     someone might be attempting to insert bogus values into the cache by
862		     sending replies containing questions and bogus answers. */
863		  if (crc == questions_crc(header, (unsigned int)m, daemon->namebuff))
864		    m = process_reply(header, now, last_server, (unsigned int)m);
865
866		  break;
867		}
868	    }
869
870	  /* In case of local answer or no connections made. */
871	  if (m == 0)
872	    m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
873	}
874
875      check_log_writer(NULL);
876
877      c1 = m>>8;
878      c2 = m;
879      if (!read_write(confd, &c1, 1, 0) ||
880	  !read_write(confd, &c2, 1, 0) ||
881	  !read_write(confd, packet, m, 0))
882	return packet;
883    }
884}
885
886static struct frec *allocate_frec(time_t now)
887{
888  struct frec *f;
889
890  if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
891    {
892      f->next = daemon->frec_list;
893      f->time = now;
894      f->sentto = NULL;
895      f->rfd4 = NULL;
896#ifdef HAVE_IPV6
897      f->rfd6 = NULL;
898#endif
899      daemon->frec_list = f;
900    }
901
902  return f;
903}
904
905static struct randfd *allocate_rfd(int family)
906{
907  static int finger = 0;
908  int i;
909
910  /* limit the number of sockets we have open to avoid starvation of
911     (eg) TFTP. Once we have a reasonable number, randomness should be OK */
912
913  for (i = 0; i < RANDOM_SOCKS; i++)
914    if (daemon->randomsocks[i].refcount == 0)
915      {
916	if ((daemon->randomsocks[i].fd = random_sock(family)) == -1)
917	  break;
918
919	daemon->randomsocks[i].refcount = 1;
920	daemon->randomsocks[i].family = family;
921	return &daemon->randomsocks[i];
922      }
923
924  /* No free ones or cannot get new socket, grab an existing one */
925  for (i = 0; i < RANDOM_SOCKS; i++)
926    {
927      int j = (i+finger) % RANDOM_SOCKS;
928      if (daemon->randomsocks[j].refcount != 0 &&
929	  daemon->randomsocks[j].family == family &&
930	  daemon->randomsocks[j].refcount != 0xffff)
931	{
932	  finger = j;
933	  daemon->randomsocks[j].refcount++;
934	  return &daemon->randomsocks[j];
935	}
936    }
937
938  return NULL; /* doom */
939}
940
941static void free_frec(struct frec *f)
942{
943  if (f->rfd4 && --(f->rfd4->refcount) == 0)
944    close(f->rfd4->fd);
945
946  f->rfd4 = NULL;
947  f->sentto = NULL;
948
949#ifdef HAVE_IPV6
950  if (f->rfd6 && --(f->rfd6->refcount) == 0)
951    close(f->rfd6->fd);
952
953  f->rfd6 = NULL;
954#endif
955}
956
957/* if wait==NULL return a free or older than TIMEOUT record.
958   else return *wait zero if one available, or *wait is delay to
959   when the oldest in-use record will expire. Impose an absolute
960   limit of 4*TIMEOUT before we wipe things (for random sockets) */
961struct frec *get_new_frec(time_t now, int *wait)
962{
963  struct frec *f, *oldest, *target;
964  int count;
965
966  if (wait)
967    *wait = 0;
968
969  for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next, count++)
970    if (!f->sentto)
971      target = f;
972    else
973      {
974	if (difftime(now, f->time) >= 4*TIMEOUT)
975	  {
976	    free_frec(f);
977	    target = f;
978	  }
979
980	if (!oldest || difftime(f->time, oldest->time) <= 0)
981	  oldest = f;
982      }
983
984  if (target)
985    {
986      target->time = now;
987      return target;
988    }
989
990  /* can't find empty one, use oldest if there is one
991     and it's older than timeout */
992  if (oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
993    {
994      /* keep stuff for twice timeout if we can by allocating a new
995	 record instead */
996      if (difftime(now, oldest->time) < 2*TIMEOUT &&
997	  count <= daemon->ftabsize &&
998	  (f = allocate_frec(now)))
999	return f;
1000
1001      if (!wait)
1002	{
1003	  free_frec(oldest);
1004	  oldest->time = now;
1005	}
1006      return oldest;
1007    }
1008
1009  /* none available, calculate time 'till oldest record expires */
1010  if (count > daemon->ftabsize)
1011    {
1012      if (oldest && wait)
1013	*wait = oldest->time + (time_t)TIMEOUT - now;
1014      return NULL;
1015    }
1016
1017  if (!(f = allocate_frec(now)) && wait)
1018    /* wait one second on malloc failure */
1019    *wait = 1;
1020
1021  return f; /* OK if malloc fails and this is NULL */
1022}
1023
1024/* crc is all-ones if not known. */
1025static struct frec *lookup_frec(unsigned short id, unsigned int crc)
1026{
1027  struct frec *f;
1028
1029  for(f = daemon->frec_list; f; f = f->next)
1030    if (f->sentto && f->new_id == id &&
1031	(f->crc == crc || crc == 0xffffffff))
1032      return f;
1033
1034  return NULL;
1035}
1036
1037static struct frec *lookup_frec_by_sender(unsigned short id,
1038					  union mysockaddr *addr,
1039					  unsigned int crc)
1040{
1041  struct frec *f;
1042
1043  for(f = daemon->frec_list; f; f = f->next)
1044    if (f->sentto &&
1045	f->orig_id == id &&
1046	f->crc == crc &&
1047	sockaddr_isequal(&f->source, addr))
1048      return f;
1049
1050  return NULL;
1051}
1052
1053/* A server record is going away, remove references to it */
1054void server_gone(struct server *server)
1055{
1056  struct frec *f;
1057
1058  for (f = daemon->frec_list; f; f = f->next)
1059    if (f->sentto && f->sentto == server)
1060      free_frec(f);
1061
1062  if (daemon->last_server == server)
1063    daemon->last_server = NULL;
1064
1065  if (daemon->srv_save == server)
1066    daemon->srv_save = NULL;
1067}
1068
1069/* return unique random ids.
1070   For signed packets we can't change the ID without breaking the
1071   signing, so we keep the same one. In this case force is set, and this
1072   routine degenerates into killing any conflicting forward record. */
1073static unsigned short get_id(int force, unsigned short force_id, unsigned int crc)
1074{
1075  unsigned short ret = 0;
1076
1077  if (force)
1078    {
1079      struct frec *f = lookup_frec(force_id, crc);
1080      if (f)
1081	free_frec(f); /* free */
1082      ret = force_id;
1083    }
1084  else do
1085    ret = rand16();
1086  while (lookup_frec(ret, crc));
1087
1088  return ret;
1089}
1090
1091
1092
1093
1094
1095