1#include <string.h>
2#include <stdint.h>
3#include <stdlib.h>
4#include <stdio.h>
5#include <errno.h>
6#include <byteswap.h>
7#include <gpxe/list.h>
8#include <gpxe/in.h>
9#include <gpxe/arp.h>
10#include <gpxe/if_ether.h>
11#include <gpxe/iobuf.h>
12#include <gpxe/netdevice.h>
13#include <gpxe/ip.h>
14#include <gpxe/tcpip.h>
15#include <gpxe/dhcp.h>
16#include <gpxe/settings.h>
17
18/** @file
19 *
20 * IPv4 protocol
21 *
22 */
23
24FILE_LICENCE ( GPL2_OR_LATER );
25
26/* Unique IP datagram identification number */
27static uint16_t next_ident = 0;
28
29struct net_protocol ipv4_protocol;
30
31/** List of IPv4 miniroutes */
32struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
33
34/** List of fragment reassembly buffers */
35static LIST_HEAD ( frag_buffers );
36
37/**
38 * Add IPv4 minirouting table entry
39 *
40 * @v netdev		Network device
41 * @v address		IPv4 address
42 * @v netmask		Subnet mask
43 * @v gateway		Gateway address (if any)
44 * @ret miniroute	Routing table entry, or NULL
45 */
46static struct ipv4_miniroute * __malloc
47add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
48		     struct in_addr netmask, struct in_addr gateway ) {
49	struct ipv4_miniroute *miniroute;
50
51	DBG ( "IPv4 add %s", inet_ntoa ( address ) );
52	DBG ( "/%s ", inet_ntoa ( netmask ) );
53	if ( gateway.s_addr )
54		DBG ( "gw %s ", inet_ntoa ( gateway ) );
55	DBG ( "via %s\n", netdev->name );
56
57	/* Allocate and populate miniroute structure */
58	miniroute = malloc ( sizeof ( *miniroute ) );
59	if ( ! miniroute ) {
60		DBG ( "IPv4 could not add miniroute\n" );
61		return NULL;
62	}
63
64	/* Record routing information */
65	miniroute->netdev = netdev_get ( netdev );
66	miniroute->address = address;
67	miniroute->netmask = netmask;
68	miniroute->gateway = gateway;
69
70	/* Add to end of list if we have a gateway, otherwise
71	 * to start of list.
72	 */
73	if ( gateway.s_addr ) {
74		list_add_tail ( &miniroute->list, &ipv4_miniroutes );
75	} else {
76		list_add ( &miniroute->list, &ipv4_miniroutes );
77	}
78
79	return miniroute;
80}
81
82/**
83 * Delete IPv4 minirouting table entry
84 *
85 * @v miniroute		Routing table entry
86 */
87static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
88
89	DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
90	DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
91	if ( miniroute->gateway.s_addr )
92		DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
93	DBG ( "via %s\n", miniroute->netdev->name );
94
95	netdev_put ( miniroute->netdev );
96	list_del ( &miniroute->list );
97	free ( miniroute );
98}
99
100/**
101 * Perform IPv4 routing
102 *
103 * @v dest		Final destination address
104 * @ret dest		Next hop destination address
105 * @ret miniroute	Routing table entry to use, or NULL if no route
106 *
107 * If the route requires use of a gateway, the next hop destination
108 * address will be overwritten with the gateway address.
109 */
110static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
111	struct ipv4_miniroute *miniroute;
112	int local;
113	int has_gw;
114
115	/* Never attempt to route the broadcast address */
116	if ( dest->s_addr == INADDR_BROADCAST )
117		return NULL;
118
119	/* Find first usable route in routing table */
120	list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
121		if ( ! ( miniroute->netdev->state & NETDEV_OPEN ) )
122			continue;
123		local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
124			    & miniroute->netmask.s_addr ) == 0 );
125		has_gw = ( miniroute->gateway.s_addr );
126		if ( local || has_gw ) {
127			if ( ! local )
128				*dest = miniroute->gateway;
129			return miniroute;
130		}
131	}
132
133	return NULL;
134}
135
136/**
137 * Fragment reassembly counter timeout
138 *
139 * @v timer	Retry timer
140 * @v over	If asserted, the timer is greater than @c MAX_TIMEOUT
141 */
142static void ipv4_frag_expired ( struct retry_timer *timer __unused,
143				int over ) {
144	if ( over ) {
145		DBG ( "Fragment reassembly timeout" );
146		/* Free the fragment buffer */
147	}
148}
149
150/**
151 * Free fragment buffer
152 *
153 * @v fragbug	Fragment buffer
154 */
155static void free_fragbuf ( struct frag_buffer *fragbuf ) {
156	free ( fragbuf );
157}
158
159/**
160 * Fragment reassembler
161 *
162 * @v iobuf		I/O buffer, fragment of the datagram
163 * @ret frag_iob	Reassembled packet, or NULL
164 */
165static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
166	struct iphdr *iphdr = iobuf->data;
167	struct frag_buffer *fragbuf;
168
169	/**
170	 * Check if the fragment belongs to any fragment series
171	 */
172	list_for_each_entry ( fragbuf, &frag_buffers, list ) {
173		if ( fragbuf->ident == iphdr->ident &&
174		     fragbuf->src.s_addr == iphdr->src.s_addr ) {
175			/**
176			 * Check if the packet is the expected fragment
177			 *
178			 * The offset of the new packet must be equal to the
179			 * length of the data accumulated so far (the length of
180			 * the reassembled I/O buffer
181			 */
182			if ( iob_len ( fragbuf->frag_iob ) ==
183			      ( iphdr->frags & IP_MASK_OFFSET ) ) {
184				/**
185				 * Append the contents of the fragment to the
186				 * reassembled I/O buffer
187				 */
188				iob_pull ( iobuf, sizeof ( *iphdr ) );
189				memcpy ( iob_put ( fragbuf->frag_iob,
190							iob_len ( iobuf ) ),
191					 iobuf->data, iob_len ( iobuf ) );
192				free_iob ( iobuf );
193
194				/** Check if the fragment series is over */
195				if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
196					iobuf = fragbuf->frag_iob;
197					free_fragbuf ( fragbuf );
198					return iobuf;
199				}
200
201			} else {
202				/* Discard the fragment series */
203				free_fragbuf ( fragbuf );
204				free_iob ( iobuf );
205			}
206			return NULL;
207		}
208	}
209
210	/** Check if the fragment is the first in the fragment series */
211	if ( iphdr->frags & IP_MASK_MOREFRAGS &&
212			( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
213
214		/** Create a new fragment buffer */
215		fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
216		fragbuf->ident = iphdr->ident;
217		fragbuf->src = iphdr->src;
218
219		/* Set up the reassembly I/O buffer */
220		fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
221		iob_pull ( iobuf, sizeof ( *iphdr ) );
222		memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
223			 iobuf->data, iob_len ( iobuf ) );
224		free_iob ( iobuf );
225
226		/* Set the reassembly timer */
227		fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
228		fragbuf->frag_timer.expired = ipv4_frag_expired;
229		start_timer ( &fragbuf->frag_timer );
230
231		/* Add the fragment buffer to the list of fragment buffers */
232		list_add ( &fragbuf->list, &frag_buffers );
233	}
234
235	return NULL;
236}
237
238/**
239 * Add IPv4 pseudo-header checksum to existing checksum
240 *
241 * @v iobuf		I/O buffer
242 * @v csum		Existing checksum
243 * @ret csum		Updated checksum
244 */
245static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
246	struct ipv4_pseudo_header pshdr;
247	struct iphdr *iphdr = iobuf->data;
248	size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
249
250	/* Build pseudo-header */
251	pshdr.src = iphdr->src;
252	pshdr.dest = iphdr->dest;
253	pshdr.zero_padding = 0x00;
254	pshdr.protocol = iphdr->protocol;
255	pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
256
257	/* Update the checksum value */
258	return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
259}
260
261/**
262 * Determine link-layer address
263 *
264 * @v dest		IPv4 destination address
265 * @v src		IPv4 source address
266 * @v netdev		Network device
267 * @v ll_dest		Link-layer destination address buffer
268 * @ret rc		Return status code
269 */
270static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
271			  struct net_device *netdev, uint8_t *ll_dest ) {
272	struct ll_protocol *ll_protocol = netdev->ll_protocol;
273
274	if ( dest.s_addr == INADDR_BROADCAST ) {
275		/* Broadcast address */
276		memcpy ( ll_dest, netdev->ll_broadcast,
277			 ll_protocol->ll_addr_len );
278		return 0;
279	} else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
280		return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest );
281	} else {
282		/* Unicast address: resolve via ARP */
283		return arp_resolve ( netdev, &ipv4_protocol, &dest,
284				     &src, ll_dest );
285	}
286}
287
288/**
289 * Transmit IP packet
290 *
291 * @v iobuf		I/O buffer
292 * @v tcpip		Transport-layer protocol
293 * @v st_src		Source network-layer address
294 * @v st_dest		Destination network-layer address
295 * @v netdev		Network device to use if no route found, or NULL
296 * @v trans_csum	Transport-layer checksum to complete, or NULL
297 * @ret rc		Status
298 *
299 * This function expects a transport-layer segment and prepends the IP header
300 */
301static int ipv4_tx ( struct io_buffer *iobuf,
302		     struct tcpip_protocol *tcpip_protocol,
303		     struct sockaddr_tcpip *st_src,
304		     struct sockaddr_tcpip *st_dest,
305		     struct net_device *netdev,
306		     uint16_t *trans_csum ) {
307	struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
308	struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
309	struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
310	struct ipv4_miniroute *miniroute;
311	struct in_addr next_hop;
312	uint8_t ll_dest[MAX_LL_ADDR_LEN];
313	int rc;
314
315	/* Fill up the IP header, except source address */
316	memset ( iphdr, 0, sizeof ( *iphdr ) );
317	iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
318	iphdr->service = IP_TOS;
319	iphdr->len = htons ( iob_len ( iobuf ) );
320	iphdr->ident = htons ( ++next_ident );
321	iphdr->ttl = IP_TTL;
322	iphdr->protocol = tcpip_protocol->tcpip_proto;
323	iphdr->dest = sin_dest->sin_addr;
324
325	/* Use routing table to identify next hop and transmitting netdev */
326	next_hop = iphdr->dest;
327	if ( sin_src )
328		iphdr->src = sin_src->sin_addr;
329	if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
330	     ( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
331	     ( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
332		iphdr->src = miniroute->address;
333		netdev = miniroute->netdev;
334	}
335	if ( ! netdev ) {
336		DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
337		rc = -ENETUNREACH;
338		goto err;
339	}
340
341	/* Determine link-layer destination address */
342	if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
343				   ll_dest ) ) != 0 ) {
344		DBG ( "IPv4 has no link-layer address for %s: %s\n",
345		      inet_ntoa ( next_hop ), strerror ( rc ) );
346		goto err;
347	}
348
349	/* Fix up checksums */
350	if ( trans_csum )
351		*trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
352	iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
353
354	/* Print IP4 header for debugging */
355	DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
356	DBG ( "%s len %d proto %d id %04x csum %04x\n",
357	      inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
358	      ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
359
360	/* Hand off to link layer */
361	if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
362		DBG ( "IPv4 could not transmit packet via %s: %s\n",
363		      netdev->name, strerror ( rc ) );
364		return rc;
365	}
366
367	return 0;
368
369 err:
370	free_iob ( iobuf );
371	return rc;
372}
373
374/**
375 * Process incoming packets
376 *
377 * @v iobuf	I/O buffer
378 * @v netdev	Network device
379 * @v ll_source	Link-layer destination source
380 *
381 * This function expects an IP4 network datagram. It processes the headers
382 * and sends it to the transport layer.
383 */
384static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
385		     const void *ll_source __unused ) {
386	struct iphdr *iphdr = iobuf->data;
387	size_t hdrlen;
388	size_t len;
389	union {
390		struct sockaddr_in sin;
391		struct sockaddr_tcpip st;
392	} src, dest;
393	uint16_t csum;
394	uint16_t pshdr_csum;
395	int rc;
396
397	/* Sanity check the IPv4 header */
398	if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
399		DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
400		      iob_len ( iobuf ), sizeof ( *iphdr ) );
401		goto err;
402	}
403	if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
404		DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
405		goto err;
406	}
407	hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
408	if ( hdrlen < sizeof ( *iphdr ) ) {
409		DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
410		      hdrlen, sizeof ( *iphdr ) );
411		goto err;
412	}
413	if ( hdrlen > iob_len ( iobuf ) ) {
414		DBG ( "IPv4 header too long at %zd bytes "
415		      "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
416		goto err;
417	}
418	if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
419		DBG ( "IPv4 checksum incorrect (is %04x including checksum "
420		      "field, should be 0000)\n", csum );
421		goto err;
422	}
423	len = ntohs ( iphdr->len );
424	if ( len < hdrlen ) {
425		DBG ( "IPv4 length too short at %zd bytes "
426		      "(header is %zd bytes)\n", len, hdrlen );
427		goto err;
428	}
429	if ( len > iob_len ( iobuf ) ) {
430		DBG ( "IPv4 length too long at %zd bytes "
431		      "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
432		goto err;
433	}
434
435	/* Print IPv4 header for debugging */
436	DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
437	DBG ( "%s len %d proto %d id %04x csum %04x\n",
438	      inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
439	      ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
440
441	/* Truncate packet to correct length, calculate pseudo-header
442	 * checksum and then strip off the IPv4 header.
443	 */
444	iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
445	pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
446	iob_pull ( iobuf, hdrlen );
447
448	/* Fragment reassembly */
449	if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
450	     ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
451		/* Pass the fragment to ipv4_reassemble() which either
452		 * returns a fully reassembled I/O buffer or NULL.
453		 */
454		iobuf = ipv4_reassemble ( iobuf );
455		if ( ! iobuf )
456			return 0;
457	}
458
459	/* Construct socket addresses and hand off to transport layer */
460	memset ( &src, 0, sizeof ( src ) );
461	src.sin.sin_family = AF_INET;
462	src.sin.sin_addr = iphdr->src;
463	memset ( &dest, 0, sizeof ( dest ) );
464	dest.sin.sin_family = AF_INET;
465	dest.sin.sin_addr = iphdr->dest;
466	if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
467			       &dest.st, pshdr_csum ) ) != 0 ) {
468		DBG ( "IPv4 received packet rejected by stack: %s\n",
469		      strerror ( rc ) );
470		return rc;
471	}
472
473	return 0;
474
475 err:
476	free_iob ( iobuf );
477	return -EINVAL;
478}
479
480/**
481 * Check existence of IPv4 address for ARP
482 *
483 * @v netdev		Network device
484 * @v net_addr		Network-layer address
485 * @ret rc		Return status code
486 */
487static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
488	const struct in_addr *address = net_addr;
489	struct ipv4_miniroute *miniroute;
490
491	list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
492		if ( ( miniroute->netdev == netdev ) &&
493		     ( miniroute->address.s_addr == address->s_addr ) ) {
494			/* Found matching address */
495			return 0;
496		}
497	}
498	return -ENOENT;
499}
500
501/**
502 * Convert IPv4 address to dotted-quad notation
503 *
504 * @v in	IP address
505 * @ret string	IP address in dotted-quad notation
506 */
507char * inet_ntoa ( struct in_addr in ) {
508	static char buf[16]; /* "xxx.xxx.xxx.xxx" */
509	uint8_t *bytes = ( uint8_t * ) &in;
510
511	sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
512	return buf;
513}
514
515/**
516 * Transcribe IP address
517 *
518 * @v net_addr	IP address
519 * @ret string	IP address in dotted-quad notation
520 *
521 */
522static const char * ipv4_ntoa ( const void *net_addr ) {
523	return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
524}
525
526/** IPv4 protocol */
527struct net_protocol ipv4_protocol __net_protocol = {
528	.name = "IP",
529	.net_proto = htons ( ETH_P_IP ),
530	.net_addr_len = sizeof ( struct in_addr ),
531	.rx = ipv4_rx,
532	.ntoa = ipv4_ntoa,
533};
534
535/** IPv4 TCPIP net protocol */
536struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
537	.name = "IPv4",
538	.sa_family = AF_INET,
539	.tx = ipv4_tx,
540};
541
542/** IPv4 ARP protocol */
543struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
544	.net_protocol = &ipv4_protocol,
545	.check = ipv4_arp_check,
546};
547
548/******************************************************************************
549 *
550 * Settings
551 *
552 ******************************************************************************
553 */
554
555/** IPv4 address setting */
556struct setting ip_setting __setting = {
557	.name = "ip",
558	.description = "IPv4 address",
559	.tag = DHCP_EB_YIADDR,
560	.type = &setting_type_ipv4,
561};
562
563/** IPv4 subnet mask setting */
564struct setting netmask_setting __setting = {
565	.name = "netmask",
566	.description = "IPv4 subnet mask",
567	.tag = DHCP_SUBNET_MASK,
568	.type = &setting_type_ipv4,
569};
570
571/** Default gateway setting */
572struct setting gateway_setting __setting = {
573	.name = "gateway",
574	.description = "Default gateway",
575	.tag = DHCP_ROUTERS,
576	.type = &setting_type_ipv4,
577};
578
579/**
580 * Create IPv4 routing table based on configured settings
581 *
582 * @ret rc		Return status code
583 */
584static int ipv4_create_routes ( void ) {
585	struct ipv4_miniroute *miniroute;
586	struct ipv4_miniroute *tmp;
587	struct net_device *netdev;
588	struct settings *settings;
589	struct in_addr address = { 0 };
590	struct in_addr netmask = { 0 };
591	struct in_addr gateway = { 0 };
592
593	/* Delete all existing routes */
594	list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
595		del_ipv4_miniroute ( miniroute );
596
597	/* Create a route for each configured network device */
598	for_each_netdev ( netdev ) {
599		settings = netdev_settings ( netdev );
600		/* Get IPv4 address */
601		address.s_addr = 0;
602		fetch_ipv4_setting ( settings, &ip_setting, &address );
603		if ( ! address.s_addr )
604			continue;
605		/* Get subnet mask */
606		fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
607		/* Calculate default netmask, if necessary */
608		if ( ! netmask.s_addr ) {
609			if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
610				netmask.s_addr = htonl ( IN_CLASSA_NET );
611			} else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
612				netmask.s_addr = htonl ( IN_CLASSB_NET );
613			} else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
614				netmask.s_addr = htonl ( IN_CLASSC_NET );
615			}
616		}
617		/* Get default gateway, if present */
618		fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
619		/* Configure route */
620		miniroute = add_ipv4_miniroute ( netdev, address,
621						 netmask, gateway );
622		if ( ! miniroute )
623			return -ENOMEM;
624	}
625
626	return 0;
627}
628
629/** IPv4 settings applicator */
630struct settings_applicator ipv4_settings_applicator __settings_applicator = {
631	.apply = ipv4_create_routes,
632};
633
634/* Drag in ICMP */
635REQUIRE_OBJECT ( icmp );
636