1/*
2 * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * packet filter subroutines for tcpdump
22 *	Extraction/creation by Jeffrey Mogul, DECWRL
23 */
24
25#ifdef HAVE_CONFIG_H
26#include "config.h"
27#endif
28
29#include <sys/types.h>
30#include <sys/time.h>
31#include <sys/timeb.h>
32#include <sys/socket.h>
33#include <sys/file.h>
34#include <sys/ioctl.h>
35#include <net/pfilt.h>
36
37struct mbuf;
38struct rtentry;
39#include <net/if.h>
40
41#include <netinet/in.h>
42#include <netinet/in_systm.h>
43#include <netinet/ip.h>
44#include <netinet/if_ether.h>
45#include <netinet/ip_var.h>
46#include <netinet/udp.h>
47#include <netinet/udp_var.h>
48#include <netinet/tcp.h>
49#include <netinet/tcpip.h>
50
51#include <ctype.h>
52#include <errno.h>
53#include <netdb.h>
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <unistd.h>
58
59/*
60 * Make "pcap.h" not include "pcap/bpf.h"; we are going to include the
61 * native OS version, as we need various BPF ioctls from it.
62 */
63#define PCAP_DONT_INCLUDE_PCAP_BPF_H
64#include <net/bpf.h>
65
66#include "pcap-int.h"
67
68#ifdef HAVE_OS_PROTO_H
69#include "os-proto.h"
70#endif
71
72/*
73 * FDDI packets are padded to make everything line up on a nice boundary.
74 */
75#define       PCAP_FDDIPAD 3
76
77/*
78 * Private data for capturing on Ultrix and DEC OSF/1^WDigital UNIX^W^W
79 * Tru64 UNIX packetfilter devices.
80 */
81struct pcap_pf {
82	int	filtering_in_kernel; /* using kernel filter */
83	u_long	TotPkts;	/* can't oflow for 79 hrs on ether */
84	u_long	TotAccepted;	/* count accepted by filter */
85	u_long	TotDrops;	/* count of dropped packets */
86	long	TotMissed;	/* missed by i/f during this run */
87	long	OrigMissed;	/* missed by i/f before this run */
88};
89
90static int pcap_setfilter_pf(pcap_t *, struct bpf_program *);
91
92/*
93 * BUFSPACE is the size in bytes of the packet read buffer.  Most tcpdump
94 * applications aren't going to need more than 200 bytes of packet header
95 * and the read shouldn't return more packets than packetfilter's internal
96 * queue limit (bounded at 256).
97 */
98#define BUFSPACE (200 * 256)
99
100static int
101pcap_read_pf(pcap_t *pc, int cnt, pcap_handler callback, u_char *user)
102{
103	struct pcap_pf *pf = pc->priv;
104	register u_char *p, *bp;
105	register int cc, n, buflen, inc;
106	register struct enstamp *sp;
107#ifdef LBL_ALIGN
108	struct enstamp stamp;
109#endif
110	register u_int pad;
111
112 again:
113	cc = pc->cc;
114	if (cc == 0) {
115		cc = read(pc->fd, (char *)pc->buffer + pc->offset, pc->bufsize);
116		if (cc < 0) {
117			if (errno == EWOULDBLOCK)
118				return (0);
119			if (errno == EINVAL &&
120			    lseek(pc->fd, 0L, SEEK_CUR) + pc->bufsize < 0) {
121				/*
122				 * Due to a kernel bug, after 2^31 bytes,
123				 * the kernel file offset overflows and
124				 * read fails with EINVAL. The lseek()
125				 * to 0 will fix things.
126				 */
127				(void)lseek(pc->fd, 0L, SEEK_SET);
128				goto again;
129			}
130			pcap_snprintf(pc->errbuf, sizeof(pc->errbuf), "pf read: %s",
131				pcap_strerror(errno));
132			return (-1);
133		}
134		bp = (u_char *)pc->buffer + pc->offset;
135	} else
136		bp = pc->bp;
137	/*
138	 * Loop through each packet.
139	 */
140	n = 0;
141	pad = pc->fddipad;
142	while (cc > 0) {
143		/*
144		 * Has "pcap_breakloop()" been called?
145		 * If so, return immediately - if we haven't read any
146		 * packets, clear the flag and return -2 to indicate
147		 * that we were told to break out of the loop, otherwise
148		 * leave the flag set, so that the *next* call will break
149		 * out of the loop without having read any packets, and
150		 * return the number of packets we've processed so far.
151		 */
152		if (pc->break_loop) {
153			if (n == 0) {
154				pc->break_loop = 0;
155				return (-2);
156			} else {
157				pc->cc = cc;
158				pc->bp = bp;
159				return (n);
160			}
161		}
162		if (cc < sizeof(*sp)) {
163			pcap_snprintf(pc->errbuf, sizeof(pc->errbuf),
164			    "pf short read (%d)", cc);
165			return (-1);
166		}
167#ifdef LBL_ALIGN
168		if ((long)bp & 3) {
169			sp = &stamp;
170			memcpy((char *)sp, (char *)bp, sizeof(*sp));
171		} else
172#endif
173			sp = (struct enstamp *)bp;
174		if (sp->ens_stamplen != sizeof(*sp)) {
175			pcap_snprintf(pc->errbuf, sizeof(pc->errbuf),
176			    "pf short stamplen (%d)",
177			    sp->ens_stamplen);
178			return (-1);
179		}
180
181		p = bp + sp->ens_stamplen;
182		buflen = sp->ens_count;
183		if (buflen > pc->snapshot)
184			buflen = pc->snapshot;
185
186		/* Calculate inc before possible pad update */
187		inc = ENALIGN(buflen + sp->ens_stamplen);
188		cc -= inc;
189		bp += inc;
190		pf->TotPkts++;
191		pf->TotDrops += sp->ens_dropped;
192		pf->TotMissed = sp->ens_ifoverflows;
193		if (pf->OrigMissed < 0)
194			pf->OrigMissed = pf->TotMissed;
195
196		/*
197		 * Short-circuit evaluation: if using BPF filter
198		 * in kernel, no need to do it now - we already know
199		 * the packet passed the filter.
200		 *
201		 * Note: the filter code was generated assuming
202		 * that pc->fddipad was the amount of padding
203		 * before the header, as that's what's required
204		 * in the kernel, so we run the filter before
205		 * skipping that padding.
206		 */
207		if (pf->filtering_in_kernel ||
208		    bpf_filter(pc->fcode.bf_insns, p, sp->ens_count, buflen)) {
209			struct pcap_pkthdr h;
210			pf->TotAccepted++;
211			h.ts = sp->ens_tstamp;
212			h.len = sp->ens_count - pad;
213			p += pad;
214			buflen -= pad;
215			h.caplen = buflen;
216			(*callback)(user, &h, p);
217			if (++n >= cnt && !PACKET_COUNT_IS_UNLIMITED(cnt)) {
218				pc->cc = cc;
219				pc->bp = bp;
220				return (n);
221			}
222		}
223	}
224	pc->cc = 0;
225	return (n);
226}
227
228static int
229pcap_inject_pf(pcap_t *p, const void *buf, size_t size)
230{
231	int ret;
232
233	ret = write(p->fd, buf, size);
234	if (ret == -1) {
235		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "send: %s",
236		    pcap_strerror(errno));
237		return (-1);
238	}
239	return (ret);
240}
241
242static int
243pcap_stats_pf(pcap_t *p, struct pcap_stat *ps)
244{
245	struct pcap_pf *pf = p->priv;
246
247	/*
248	 * If packet filtering is being done in the kernel:
249	 *
250	 *	"ps_recv" counts only packets that passed the filter.
251	 *	This does not include packets dropped because we
252	 *	ran out of buffer space.  (XXX - perhaps it should,
253	 *	by adding "ps_drop" to "ps_recv", for compatibility
254	 *	with some other platforms.  On the other hand, on
255	 *	some platforms "ps_recv" counts only packets that
256	 *	passed the filter, and on others it counts packets
257	 *	that didn't pass the filter....)
258	 *
259	 *	"ps_drop" counts packets that passed the kernel filter
260	 *	(if any) but were dropped because the input queue was
261	 *	full.
262	 *
263	 *	"ps_ifdrop" counts packets dropped by the network
264	 *	inteface (regardless of whether they would have passed
265	 *	the input filter, of course).
266	 *
267	 * If packet filtering is not being done in the kernel:
268	 *
269	 *	"ps_recv" counts only packets that passed the filter.
270	 *
271	 *	"ps_drop" counts packets that were dropped because the
272	 *	input queue was full, regardless of whether they passed
273	 *	the userland filter.
274	 *
275	 *	"ps_ifdrop" counts packets dropped by the network
276	 *	inteface (regardless of whether they would have passed
277	 *	the input filter, of course).
278	 *
279	 * These statistics don't include packets not yet read from
280	 * the kernel by libpcap, but they may include packets not
281	 * yet read from libpcap by the application.
282	 */
283	ps->ps_recv = pf->TotAccepted;
284	ps->ps_drop = pf->TotDrops;
285	ps->ps_ifdrop = pf->TotMissed - pf->OrigMissed;
286	return (0);
287}
288
289/*
290 * We include the OS's <net/bpf.h>, not our "pcap/bpf.h", so we probably
291 * don't get DLT_DOCSIS defined.
292 */
293#ifndef DLT_DOCSIS
294#define DLT_DOCSIS	143
295#endif
296
297static int
298pcap_activate_pf(pcap_t *p)
299{
300	struct pcap_pf *pf = p->priv;
301	short enmode;
302	int backlog = -1;	/* request the most */
303	struct enfilter Filter;
304	struct endevp devparams;
305
306	/*
307	 * Initially try a read/write open (to allow the inject
308	 * method to work).  If that fails due to permission
309	 * issues, fall back to read-only.  This allows a
310	 * non-root user to be granted specific access to pcap
311	 * capabilities via file permissions.
312	 *
313	 * XXX - we should have an API that has a flag that
314	 * controls whether to open read-only or read-write,
315	 * so that denial of permission to send (or inability
316	 * to send, if sending packets isn't supported on
317	 * the device in question) can be indicated at open
318	 * time.
319	 *
320	 * XXX - we assume here that "pfopen()" does not, in fact, modify
321	 * its argument, even though it takes a "char *" rather than a
322	 * "const char *" as its first argument.  That appears to be
323	 * the case, at least on Digital UNIX 4.0.
324	 *
325	 * XXX - is there an error that means "no such device"?  Is
326	 * there one that means "that device doesn't support pf"?
327	 */
328	p->fd = pfopen(p->opt.device, O_RDWR);
329	if (p->fd == -1 && errno == EACCES)
330		p->fd = pfopen(p->opt.device, O_RDONLY);
331	if (p->fd < 0) {
332		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "pf open: %s: %s\n\
333your system may not be properly configured; see the packetfilter(4) man page\n",
334			p->opt.device, pcap_strerror(errno));
335		goto bad;
336	}
337	pf->OrigMissed = -1;
338	enmode = ENTSTAMP|ENNONEXCL;
339	if (!p->opt.immediate)
340		enmode |= ENBATCH;
341	if (p->opt.promisc)
342		enmode |= ENPROMISC;
343	if (ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode) < 0) {
344		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCMBIS: %s",
345		    pcap_strerror(errno));
346		goto bad;
347	}
348#ifdef	ENCOPYALL
349	/* Try to set COPYALL mode so that we see packets to ourself */
350	enmode = ENCOPYALL;
351	(void)ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode);/* OK if this fails */
352#endif
353	/* set the backlog */
354	if (ioctl(p->fd, EIOCSETW, (caddr_t)&backlog) < 0) {
355		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSETW: %s",
356		    pcap_strerror(errno));
357		goto bad;
358	}
359	/* discover interface type */
360	if (ioctl(p->fd, EIOCDEVP, (caddr_t)&devparams) < 0) {
361		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCDEVP: %s",
362		    pcap_strerror(errno));
363		goto bad;
364	}
365	/* HACK: to compile prior to Ultrix 4.2 */
366#ifndef	ENDT_FDDI
367#define	ENDT_FDDI	4
368#endif
369	switch (devparams.end_dev_type) {
370
371	case ENDT_10MB:
372		p->linktype = DLT_EN10MB;
373		p->offset = 2;
374		/*
375		 * This is (presumably) a real Ethernet capture; give it a
376		 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
377		 * that an application can let you choose it, in case you're
378		 * capturing DOCSIS traffic that a Cisco Cable Modem
379		 * Termination System is putting out onto an Ethernet (it
380		 * doesn't put an Ethernet header onto the wire, it puts raw
381		 * DOCSIS frames out on the wire inside the low-level
382		 * Ethernet framing).
383		 */
384		p->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
385		/*
386		 * If that fails, just leave the list empty.
387		 */
388		if (p->dlt_list != NULL) {
389			p->dlt_list[0] = DLT_EN10MB;
390			p->dlt_list[1] = DLT_DOCSIS;
391			p->dlt_count = 2;
392		}
393		break;
394
395	case ENDT_FDDI:
396		p->linktype = DLT_FDDI;
397		break;
398
399#ifdef ENDT_SLIP
400	case ENDT_SLIP:
401		p->linktype = DLT_SLIP;
402		break;
403#endif
404
405#ifdef ENDT_PPP
406	case ENDT_PPP:
407		p->linktype = DLT_PPP;
408		break;
409#endif
410
411#ifdef ENDT_LOOPBACK
412	case ENDT_LOOPBACK:
413		/*
414		 * It appears to use Ethernet framing, at least on
415		 * Digital UNIX 4.0.
416		 */
417		p->linktype = DLT_EN10MB;
418		p->offset = 2;
419		break;
420#endif
421
422#ifdef ENDT_TRN
423	case ENDT_TRN:
424		p->linktype = DLT_IEEE802;
425		break;
426#endif
427
428	default:
429		/*
430		 * XXX - what about ENDT_IEEE802?  The pfilt.h header
431		 * file calls this "IEEE 802 networks (non-Ethernet)",
432		 * but that doesn't specify a specific link layer type;
433		 * it could be 802.4, or 802.5 (except that 802.5 is
434		 * ENDT_TRN), or 802.6, or 802.11, or....  That's why
435		 * DLT_IEEE802 was hijacked to mean Token Ring in various
436		 * BSDs, and why we went along with that hijacking.
437		 *
438		 * XXX - what about ENDT_HDLC and ENDT_NULL?
439		 * Presumably, as ENDT_OTHER is just "Miscellaneous
440		 * framing", there's not much we can do, as that
441		 * doesn't specify a particular type of header.
442		 */
443		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE,
444		    "unknown data-link type %u", devparams.end_dev_type);
445		goto bad;
446	}
447	/* set truncation */
448	if (p->linktype == DLT_FDDI) {
449		p->fddipad = PCAP_FDDIPAD;
450
451		/* packetfilter includes the padding in the snapshot */
452		p->snapshot += PCAP_FDDIPAD;
453	} else
454		p->fddipad = 0;
455	if (ioctl(p->fd, EIOCTRUNCATE, (caddr_t)&p->snapshot) < 0) {
456		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCTRUNCATE: %s",
457		    pcap_strerror(errno));
458		goto bad;
459	}
460	/* accept all packets */
461	memset(&Filter, 0, sizeof(Filter));
462	Filter.enf_Priority = 37;	/* anything > 2 */
463	Filter.enf_FilterLen = 0;	/* means "always true" */
464	if (ioctl(p->fd, EIOCSETF, (caddr_t)&Filter) < 0) {
465		pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSETF: %s",
466		    pcap_strerror(errno));
467		goto bad;
468	}
469
470	if (p->opt.timeout != 0) {
471		struct timeval timeout;
472		timeout.tv_sec = p->opt.timeout / 1000;
473		timeout.tv_usec = (p->opt.timeout * 1000) % 1000000;
474		if (ioctl(p->fd, EIOCSRTIMEOUT, (caddr_t)&timeout) < 0) {
475			pcap_snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "EIOCSRTIMEOUT: %s",
476				pcap_strerror(errno));
477			goto bad;
478		}
479	}
480
481	p->bufsize = BUFSPACE;
482	p->buffer = malloc(p->bufsize + p->offset);
483	if (p->buffer == NULL) {
484		strlcpy(p->errbuf, pcap_strerror(errno), PCAP_ERRBUF_SIZE);
485		goto bad;
486	}
487
488	/*
489	 * "select()" and "poll()" work on packetfilter devices.
490	 */
491	p->selectable_fd = p->fd;
492
493	p->read_op = pcap_read_pf;
494	p->inject_op = pcap_inject_pf;
495	p->setfilter_op = pcap_setfilter_pf;
496	p->setdirection_op = NULL;	/* Not implemented. */
497	p->set_datalink_op = NULL;	/* can't change data link type */
498	p->getnonblock_op = pcap_getnonblock_fd;
499	p->setnonblock_op = pcap_setnonblock_fd;
500	p->stats_op = pcap_stats_pf;
501
502	return (0);
503 bad:
504	pcap_cleanup_live_common(p);
505	return (PCAP_ERROR);
506}
507
508pcap_t *
509pcap_create_interface(const char *device _U_, char *ebuf)
510{
511	pcap_t *p;
512
513	p = pcap_create_common(ebuf, sizeof (struct pcap_pf));
514	if (p == NULL)
515		return (NULL);
516
517	p->activate_op = pcap_activate_pf;
518	return (p);
519}
520
521/*
522 * XXX - is there an error from pfopen() that means "no such device"?
523 * Is there one that means "that device doesn't support pf"?
524 */
525static int
526can_be_bound(const char *name _U_)
527{
528	return (1);
529}
530
531int
532pcap_platform_finddevs(pcap_if_t **alldevsp, char *errbuf)
533{
534	return (pcap_findalldevs_interfaces(alldevsp, errbuf, can_be_bound));
535}
536
537static int
538pcap_setfilter_pf(pcap_t *p, struct bpf_program *fp)
539{
540	struct pcap_pf *pf = p->priv;
541	struct bpf_version bv;
542
543	/*
544	 * See if BIOCVERSION works.  If not, we assume the kernel doesn't
545	 * support BPF-style filters (it's not documented in the bpf(7)
546	 * or packetfiler(7) man pages, but the code used to fail if
547	 * BIOCSETF worked but BIOCVERSION didn't, and I've seen it do
548	 * kernel filtering in DU 4.0, so presumably BIOCVERSION works
549	 * there, at least).
550	 */
551	if (ioctl(p->fd, BIOCVERSION, (caddr_t)&bv) >= 0) {
552		/*
553		 * OK, we have the version of the BPF interpreter;
554		 * is it the same major version as us, and the same
555		 * or better minor version?
556		 */
557		if (bv.bv_major == BPF_MAJOR_VERSION &&
558		    bv.bv_minor >= BPF_MINOR_VERSION) {
559			/*
560			 * Yes.  Try to install the filter.
561			 */
562			if (ioctl(p->fd, BIOCSETF, (caddr_t)fp) < 0) {
563				pcap_snprintf(p->errbuf, sizeof(p->errbuf),
564				    "BIOCSETF: %s", pcap_strerror(errno));
565				return (-1);
566			}
567
568			/*
569			 * OK, that succeeded.  We're doing filtering in
570			 * the kernel.  (We assume we don't have a
571			 * userland filter installed - that'd require
572			 * a previous version check to have failed but
573			 * this one to succeed.)
574			 *
575			 * XXX - this message should be supplied to the
576			 * application as a warning of some sort,
577			 * except that if it's a GUI application, it's
578			 * not clear that it should be displayed in
579			 * a window to annoy the user.
580			 */
581			fprintf(stderr, "tcpdump: Using kernel BPF filter\n");
582			pf->filtering_in_kernel = 1;
583
584			/*
585			 * Discard any previously-received packets,
586			 * as they might have passed whatever filter
587			 * was formerly in effect, but might not pass
588			 * this filter (BIOCSETF discards packets buffered
589			 * in the kernel, so you can lose packets in any
590			 * case).
591			 */
592			p->cc = 0;
593			return (0);
594		}
595
596		/*
597		 * We can't use the kernel's BPF interpreter; don't give
598		 * up, just log a message and be inefficient.
599		 *
600		 * XXX - this should really be supplied to the application
601		 * as a warning of some sort.
602		 */
603		fprintf(stderr,
604	    "tcpdump: Requires BPF language %d.%d or higher; kernel is %d.%d\n",
605		    BPF_MAJOR_VERSION, BPF_MINOR_VERSION,
606		    bv.bv_major, bv.bv_minor);
607	}
608
609	/*
610	 * We couldn't do filtering in the kernel; do it in userland.
611	 */
612	if (install_bpf_program(p, fp) < 0)
613		return (-1);
614
615	/*
616	 * XXX - this message should be supplied by the application as
617	 * a warning of some sort.
618	 */
619	fprintf(stderr, "tcpdump: Filtering in user process\n");
620	pf->filtering_in_kernel = 0;
621	return (0);
622}
623