1/*
2 * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19FILE_LICENCE ( GPL2_OR_LATER );
20
21#include <string.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <ctype.h>
25#include <errno.h>
26#include <assert.h>
27#include <byteswap.h>
28#include <gpxe/if_ether.h>
29#include <gpxe/netdevice.h>
30#include <gpxe/device.h>
31#include <gpxe/xfer.h>
32#include <gpxe/open.h>
33#include <gpxe/job.h>
34#include <gpxe/retry.h>
35#include <gpxe/tcpip.h>
36#include <gpxe/ip.h>
37#include <gpxe/uuid.h>
38#include <gpxe/timer.h>
39#include <gpxe/settings.h>
40#include <gpxe/dhcp.h>
41#include <gpxe/dhcpopts.h>
42#include <gpxe/dhcppkt.h>
43#include <gpxe/features.h>
44
45/** @file
46 *
47 * Dynamic Host Configuration Protocol
48 *
49 */
50
51struct dhcp_session;
52static int dhcp_tx ( struct dhcp_session *dhcp );
53
54/**
55 * DHCP operation types
56 *
57 * This table maps from DHCP message types (i.e. values of the @c
58 * DHCP_MESSAGE_TYPE option) to values of the "op" field within a DHCP
59 * packet.
60 */
61static const uint8_t dhcp_op[] = {
62	[DHCPDISCOVER]	= BOOTP_REQUEST,
63	[DHCPOFFER]	= BOOTP_REPLY,
64	[DHCPREQUEST]	= BOOTP_REQUEST,
65	[DHCPDECLINE]	= BOOTP_REQUEST,
66	[DHCPACK]	= BOOTP_REPLY,
67	[DHCPNAK]	= BOOTP_REPLY,
68	[DHCPRELEASE]	= BOOTP_REQUEST,
69	[DHCPINFORM]	= BOOTP_REQUEST,
70};
71
72/** Raw option data for options common to all DHCP requests */
73static uint8_t dhcp_request_options_data[] = {
74	DHCP_MESSAGE_TYPE, DHCP_BYTE ( 0 ),
75	DHCP_MAX_MESSAGE_SIZE,
76	DHCP_WORD ( ETH_MAX_MTU - 20 /* IP header */ - 8 /* UDP header */ ),
77	DHCP_CLIENT_ARCHITECTURE, DHCP_WORD ( 0 ),
78	DHCP_CLIENT_NDI, DHCP_OPTION ( 1 /* UNDI */ , 2, 1 /* v2.1 */ ),
79	DHCP_VENDOR_CLASS_ID,
80	DHCP_STRING (  'P', 'X', 'E', 'C', 'l', 'i', 'e', 'n', 't', ':',
81		       'A', 'r', 'c', 'h', ':', '0', '0', '0', '0', '0', ':',
82		       'U', 'N', 'D', 'I', ':', '0', '0', '2', '0', '0', '1' ),
83	DHCP_USER_CLASS_ID,
84	DHCP_STRING ( 'g', 'P', 'X', 'E' ),
85	DHCP_PARAMETER_REQUEST_LIST,
86	DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS,
87		      DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME,
88		      DHCP_ROOT_PATH, DHCP_VENDOR_ENCAP, DHCP_VENDOR_CLASS_ID,
89		      DHCP_TFTP_SERVER_NAME, DHCP_BOOTFILE_NAME,
90		      DHCP_EB_ENCAP, DHCP_ISCSI_INITIATOR_IQN ),
91	DHCP_END
92};
93
94/** Version number feature */
95FEATURE_VERSION ( VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH );
96
97/** DHCP server address setting */
98struct setting dhcp_server_setting __setting = {
99	.name = "dhcp-server",
100	.description = "DHCP server address",
101	.tag = DHCP_SERVER_IDENTIFIER,
102	.type = &setting_type_ipv4,
103};
104
105/** DHCP user class setting */
106struct setting user_class_setting __setting = {
107	.name = "user-class",
108	.description = "User class identifier",
109	.tag = DHCP_USER_CLASS_ID,
110	.type = &setting_type_string,
111};
112
113/** Use cached network settings */
114struct setting use_cached_setting __setting = {
115	.name = "use-cached",
116	.description = "Use cached network settings",
117	.tag = DHCP_EB_USE_CACHED,
118	.type = &setting_type_uint8,
119};
120
121/**
122 * Name a DHCP packet type
123 *
124 * @v msgtype		DHCP message type
125 * @ret string		DHCP mesasge type name
126 */
127static inline const char * dhcp_msgtype_name ( unsigned int msgtype ) {
128	switch ( msgtype ) {
129	case DHCPNONE:		return "BOOTP"; /* Non-DHCP packet */
130	case DHCPDISCOVER:	return "DHCPDISCOVER";
131	case DHCPOFFER:		return "DHCPOFFER";
132	case DHCPREQUEST:	return "DHCPREQUEST";
133	case DHCPDECLINE:	return "DHCPDECLINE";
134	case DHCPACK:		return "DHCPACK";
135	case DHCPNAK:		return "DHCPNAK";
136	case DHCPRELEASE:	return "DHCPRELEASE";
137	case DHCPINFORM:	return "DHCPINFORM";
138	default:		return "DHCP<invalid>";
139	}
140}
141
142/**
143 * Calculate DHCP transaction ID for a network device
144 *
145 * @v netdev		Network device
146 * @ret xid		DHCP XID
147 *
148 * Extract the least significant bits of the hardware address for use
149 * as the transaction ID.
150 */
151static uint32_t dhcp_xid ( struct net_device *netdev ) {
152	uint32_t xid;
153
154	memcpy ( &xid, ( netdev->ll_addr + netdev->ll_protocol->ll_addr_len
155			 - sizeof ( xid ) ), sizeof ( xid ) );
156	return xid;
157}
158
159/****************************************************************************
160 *
161 * DHCP session
162 *
163 */
164
165struct dhcp_session;
166
167/** DHCP session state operations */
168struct dhcp_session_state {
169	/** State name */
170	const char *name;
171	/**
172	 * Construct transmitted packet
173	 *
174	 * @v dhcp		DHCP session
175	 * @v dhcppkt		DHCP packet
176	 * @v peer		Destination address
177	 */
178	int ( * tx ) ( struct dhcp_session *dhcp,
179		       struct dhcp_packet *dhcppkt,
180		       struct sockaddr_in *peer );
181	/** Handle received packet
182	 *
183	 * @v dhcp		DHCP session
184	 * @v dhcppkt		DHCP packet
185	 * @v peer		DHCP server address
186	 * @v msgtype		DHCP message type
187	 * @v server_id		DHCP server ID
188	 */
189	void ( * rx ) ( struct dhcp_session *dhcp,
190			struct dhcp_packet *dhcppkt,
191			struct sockaddr_in *peer,
192			uint8_t msgtype, struct in_addr server_id );
193	/** Handle timer expiry
194	 *
195	 * @v dhcp		DHCP session
196	 */
197	void ( * expired ) ( struct dhcp_session *dhcp );
198	/** Transmitted message type */
199	uint8_t tx_msgtype;
200	/** Apply minimum timeout */
201	uint8_t apply_min_timeout;
202};
203
204static struct dhcp_session_state dhcp_state_discover;
205static struct dhcp_session_state dhcp_state_request;
206static struct dhcp_session_state dhcp_state_proxy;
207static struct dhcp_session_state dhcp_state_pxebs;
208
209/** DHCP offer is valid for IP lease */
210#define DHCP_OFFER_IP	1
211
212/** DHCP offer is valid for PXE options */
213#define DHCP_OFFER_PXE	2
214
215/** A DHCP offer */
216struct dhcp_offer {
217	/** IP address of server granting offer */
218	struct in_addr server;
219
220	/** IP address being offered, or 0.0.0.0 for a pure proxy */
221	struct in_addr ip;
222
223	/** DHCP packet containing PXE options; NULL if missing or proxied */
224	struct dhcp_packet *pxe;
225
226	/** Valid uses for this offer, a combination of DHCP_OFFER bits */
227	uint8_t valid;
228
229	/** Priority of this offer */
230	int8_t priority;
231
232	/** Whether to ignore PXE DHCP extensions */
233	uint8_t no_pxedhcp;
234};
235
236/** Maximum number of DHCP offers to queue */
237#define DHCP_MAX_OFFERS   6
238
239/** A DHCP session */
240struct dhcp_session {
241	/** Reference counter */
242	struct refcnt refcnt;
243	/** Job control interface */
244	struct job_interface job;
245	/** Data transfer interface */
246	struct xfer_interface xfer;
247
248	/** Network device being configured */
249	struct net_device *netdev;
250	/** Local socket address */
251	struct sockaddr_in local;
252	/** State of the session */
253	struct dhcp_session_state *state;
254
255	/** PXE Boot Server type */
256	uint16_t pxe_type;
257	/** List of PXE Boot Servers to attempt */
258	struct in_addr *pxe_attempt;
259	/** List of PXE Boot Servers to accept */
260	struct in_addr *pxe_accept;
261
262	/** Retransmission timer */
263	struct retry_timer timer;
264	/** Start time of the current state (in ticks) */
265	unsigned long start;
266
267	/** DHCP offer just requested */
268	struct dhcp_offer *current_offer;
269	/** List of DHCP offers received */
270	struct dhcp_offer offers[DHCP_MAX_OFFERS];
271};
272
273/**
274 * Free DHCP session
275 *
276 * @v refcnt		Reference counter
277 */
278static void dhcp_free ( struct refcnt *refcnt ) {
279	struct dhcp_session *dhcp =
280		container_of ( refcnt, struct dhcp_session, refcnt );
281	int i;
282
283	for ( i = 0 ; i < DHCP_MAX_OFFERS ; i++ ) {
284		if ( dhcp->offers[i].pxe )
285			dhcppkt_put ( dhcp->offers[i].pxe );
286	}
287
288	netdev_put ( dhcp->netdev );
289	free ( dhcp );
290}
291
292/**
293 * Mark DHCP session as complete
294 *
295 * @v dhcp		DHCP session
296 * @v rc		Return status code
297 */
298static void dhcp_finished ( struct dhcp_session *dhcp, int rc ) {
299
300	/* Block futher incoming messages */
301	job_nullify ( &dhcp->job );
302	xfer_nullify ( &dhcp->xfer );
303
304	/* Stop retry timer */
305	stop_timer ( &dhcp->timer );
306
307	/* Free resources and close interfaces */
308	xfer_close ( &dhcp->xfer, rc );
309	job_done ( &dhcp->job, rc );
310}
311
312/**
313 * Transition to new DHCP session state
314 *
315 * @v dhcp		DHCP session
316 * @v state		New session state
317 */
318static void dhcp_set_state ( struct dhcp_session *dhcp,
319			     struct dhcp_session_state *state ) {
320
321	DBGC ( dhcp, "DHCP %p entering %s state\n", dhcp, state->name );
322	dhcp->state = state;
323	dhcp->start = currticks();
324	stop_timer ( &dhcp->timer );
325	dhcp->timer.min_timeout =
326		( state->apply_min_timeout ? DHCP_MIN_TIMEOUT : 0 );
327	dhcp->timer.max_timeout = DHCP_MAX_TIMEOUT;
328	start_timer_nodelay ( &dhcp->timer );
329}
330
331/**
332 * Determine next DHCP offer to try
333 *
334 * @v dhcp		DHCP session
335 * @v type		DHCP offer type
336 * @ret offer		Next DHCP offer to try
337 *
338 * Offers are ranked by priority, then by completeness (combined
339 * IP+PXE are tried before @a type alone), then by order of receipt.
340 */
341static struct dhcp_offer * dhcp_next_offer ( struct dhcp_session *dhcp,
342					     uint8_t type ) {
343
344	struct dhcp_offer *offer;
345	struct dhcp_offer *best = NULL;
346
347	for ( offer = dhcp->offers ; offer < dhcp->offers + DHCP_MAX_OFFERS ;
348	      offer++ ) {
349		if ( ( offer->valid & type ) &&
350		     ( ( best == NULL ) ||
351		       ( offer->priority > best->priority ) ||
352		       ( ( offer->priority == best->priority ) &&
353			 ( offer->valid & ~best->valid ) ) ) )
354			best = offer;
355	}
356
357	return best;
358}
359
360/****************************************************************************
361 *
362 * DHCP state machine
363 *
364 */
365
366/**
367 * Construct transmitted packet for DHCP discovery
368 *
369 * @v dhcp		DHCP session
370 * @v dhcppkt		DHCP packet
371 * @v peer		Destination address
372 */
373static int dhcp_discovery_tx ( struct dhcp_session *dhcp,
374			       struct dhcp_packet *dhcppkt __unused,
375			       struct sockaddr_in *peer ) {
376
377	DBGC ( dhcp, "DHCP %p DHCPDISCOVER\n", dhcp );
378
379	/* Set server address */
380	peer->sin_addr.s_addr = INADDR_BROADCAST;
381	peer->sin_port = htons ( BOOTPS_PORT );
382
383	return 0;
384}
385
386/**
387 * Handle received DHCPOFFER during any state
388 *
389 * @v dhcp		DHCP session
390 * @v dhcppkt		DHCP packet
391 * @v peer		DHCP server address
392 * @v msgtype		DHCP message type
393 * @v server_id		DHCP server ID
394 */
395static void dhcp_rx_offer ( struct dhcp_session *dhcp,
396			    struct dhcp_packet *dhcppkt,
397			    struct sockaddr_in *peer, uint8_t msgtype,
398			    struct in_addr server_id ) {
399	char vci[9]; /* "PXEClient" */
400	int vci_len;
401	int has_pxeclient;
402	int pxeopts_len;
403	int has_pxeopts;
404	struct dhcp_offer *offer;
405	int i;
406
407	DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
408	       dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
409	       ntohs ( peer->sin_port ) );
410	if ( server_id.s_addr != peer->sin_addr.s_addr )
411		DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
412
413	/* Identify offered IP address */
414	if ( dhcppkt->dhcphdr->yiaddr.s_addr )
415		DBGC ( dhcp, " for %s", inet_ntoa ( dhcppkt->dhcphdr->yiaddr ));
416
417	/* Enqueue an offer to be filled in */
418	for ( i = 0 ; i < DHCP_MAX_OFFERS ; i++ ) {
419		if ( dhcp->offers[i].server.s_addr == server_id.s_addr ) {
420			DBGC ( dhcp, " dup\n" );
421			return;
422		}
423
424		if ( ! dhcp->offers[i].valid )
425			break;
426	}
427	if ( i == DHCP_MAX_OFFERS ) {
428		DBGC ( dhcp, " dropped\n" );
429		return;
430	}
431
432	offer = &dhcp->offers[i];
433	offer->server = server_id;
434	offer->ip = dhcppkt->dhcphdr->yiaddr;
435
436	/* Identify "PXEClient" vendor class */
437	vci_len = dhcppkt_fetch ( dhcppkt, DHCP_VENDOR_CLASS_ID,
438				  vci, sizeof ( vci ) );
439	has_pxeclient = ( ( vci_len >= ( int ) sizeof ( vci ) ) &&
440			  ( strncmp ( "PXEClient", vci, sizeof (vci) ) == 0 ));
441
442	/* Identify presence of PXE-specific options */
443	pxeopts_len = dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU, NULL, 0 );
444	has_pxeopts = ( pxeopts_len >= 0 );
445	if ( has_pxeclient )
446		DBGC ( dhcp, "%s", ( has_pxeopts ? " pxe" : " proxy" ) );
447
448	if ( has_pxeclient && has_pxeopts ) {
449		/* Save reference to packet for future use */
450		if ( offer->pxe )
451			dhcppkt_put ( offer->pxe );
452		offer->pxe = dhcppkt_get ( dhcppkt );
453	}
454
455	/* Identify priority */
456	dhcppkt_fetch ( dhcppkt, DHCP_EB_PRIORITY, &offer->priority,
457			sizeof ( offer->priority ) );
458	if ( offer->priority )
459		DBGC ( dhcp, " pri %d", offer->priority );
460
461	/* Identify ignore-PXE flag */
462	dhcppkt_fetch ( dhcppkt, DHCP_EB_NO_PXEDHCP, &offer->no_pxedhcp,
463			sizeof ( offer->no_pxedhcp ) );
464	if ( offer->no_pxedhcp )
465		DBGC ( dhcp, " nopxe" );
466	DBGC ( dhcp, "\n" );
467
468	/* Determine roles this offer can fill */
469	if ( offer->ip.s_addr &&
470	     ( peer->sin_port == htons ( BOOTPS_PORT ) ) &&
471	     ( ( msgtype == DHCPOFFER ) || ( ! msgtype /* BOOTP */ ) ) )
472		offer->valid |= DHCP_OFFER_IP;
473
474	if ( has_pxeclient && ( msgtype == DHCPOFFER ) )
475		offer->valid |= DHCP_OFFER_PXE;
476}
477
478/**
479 * Handle received packet during DHCP discovery
480 *
481 * @v dhcp		DHCP session
482 * @v dhcppkt		DHCP packet
483 * @v peer		DHCP server address
484 * @v msgtype		DHCP message type
485 * @v server_id		DHCP server ID
486 */
487static void dhcp_discovery_rx ( struct dhcp_session *dhcp,
488				struct dhcp_packet *dhcppkt,
489				struct sockaddr_in *peer, uint8_t msgtype,
490				struct in_addr server_id ) {
491	unsigned long elapsed;
492	struct dhcp_offer *ip_offer;
493
494	dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id );
495
496	/* We can exit the discovery state when we have a valid
497	 * DHCPOFFER, and either:
498	 *
499	 *  o  The DHCPOFFER instructs us to ignore ProxyDHCPOFFERs, or
500	 *  o  We have a valid ProxyDHCPOFFER, or
501	 *  o  We have allowed sufficient time for ProxyDHCPOFFERs.
502	 */
503
504	/* If we don't yet have a DHCPOFFER, do nothing */
505	ip_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_IP );
506	if ( ! ip_offer )
507		return;
508
509	/* If we can't yet transition to DHCPREQUEST, do nothing */
510	elapsed = ( currticks() - dhcp->start );
511	if ( ! ( ip_offer->no_pxedhcp ||
512		 dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ) ||
513		 ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) )
514		return;
515
516	/* Transition to DHCPREQUEST */
517	dhcp_set_state ( dhcp, &dhcp_state_request );
518}
519
520/**
521 * Handle timer expiry during DHCP discovery
522 *
523 * @v dhcp		DHCP session
524 */
525static void dhcp_discovery_expired ( struct dhcp_session *dhcp ) {
526	unsigned long elapsed = ( currticks() - dhcp->start );
527
528	/* Give up waiting for ProxyDHCP before we reach the failure point */
529	if ( dhcp_next_offer ( dhcp, DHCP_OFFER_IP ) &&
530	     ( elapsed > PROXYDHCP_MAX_TIMEOUT ) ) {
531		dhcp_set_state ( dhcp, &dhcp_state_request );
532		return;
533	}
534
535	/* Otherwise, retransmit current packet */
536	dhcp_tx ( dhcp );
537}
538
539/** DHCP discovery state operations */
540static struct dhcp_session_state dhcp_state_discover = {
541	.name			= "discovery",
542	.tx			= dhcp_discovery_tx,
543	.rx			= dhcp_discovery_rx,
544	.expired		= dhcp_discovery_expired,
545	.tx_msgtype		= DHCPDISCOVER,
546	.apply_min_timeout	= 1,
547};
548
549/**
550 * Construct transmitted packet for DHCP request
551 *
552 * @v dhcp		DHCP session
553 * @v dhcppkt		DHCP packet
554 * @v peer		Destination address
555 */
556static int dhcp_request_tx ( struct dhcp_session *dhcp,
557			     struct dhcp_packet *dhcppkt,
558			     struct sockaddr_in *peer ) {
559	int rc;
560	struct dhcp_offer *offer;
561
562	offer = dhcp->current_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_IP );
563
564	DBGC ( dhcp, "DHCP %p DHCPREQUEST to %s:%d",
565	       dhcp, inet_ntoa ( offer->server ), BOOTPS_PORT );
566	DBGC ( dhcp, " for %s\n", inet_ntoa ( offer->ip ) );
567
568	/* Set server ID */
569	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER,
570				    &offer->server,
571				    sizeof ( offer->server ) ) ) != 0 )
572		return rc;
573
574	/* Set requested IP address */
575	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_REQUESTED_ADDRESS,
576				    &offer->ip, sizeof ( offer->ip ) ) ) != 0 )
577		return rc;
578
579	/* Set server address */
580	peer->sin_addr.s_addr = INADDR_BROADCAST;
581	peer->sin_port = htons ( BOOTPS_PORT );
582
583	return 0;
584}
585
586/**
587 * Handle received packet during DHCP request
588 *
589 * @v dhcp		DHCP session
590 * @v dhcppkt		DHCP packet
591 * @v peer		DHCP server address
592 * @v msgtype		DHCP message type
593 * @v server_id		DHCP server ID
594 */
595static void dhcp_request_rx ( struct dhcp_session *dhcp,
596			      struct dhcp_packet *dhcppkt,
597			      struct sockaddr_in *peer, uint8_t msgtype,
598			      struct in_addr server_id ) {
599	struct in_addr ip;
600	struct settings *parent;
601	int rc;
602	struct dhcp_offer *pxe_offer;
603
604	if ( msgtype == DHCPOFFER ) {
605		dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id );
606		if ( dhcp_next_offer ( dhcp, DHCP_OFFER_IP ) !=
607		     dhcp->current_offer ) {
608			/* Restart due to higher-priority offer received */
609			DBGC ( dhcp, "DHCP %p re-requesting\n", dhcp );
610			dhcp_set_state ( dhcp, &dhcp_state_request );
611		}
612		return;
613	}
614
615	DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
616	       dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
617	       ntohs ( peer->sin_port ) );
618	if ( server_id.s_addr != peer->sin_addr.s_addr )
619		DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
620
621	/* Identify leased IP address */
622	ip = dhcppkt->dhcphdr->yiaddr;
623	if ( ip.s_addr )
624		DBGC ( dhcp, " for %s", inet_ntoa ( ip ) );
625	DBGC ( dhcp, "\n" );
626
627	/* Filter out unacceptable responses */
628	if ( peer->sin_port != htons ( BOOTPS_PORT ) )
629		return;
630	if ( msgtype /* BOOTP */ && ( msgtype != DHCPACK ) )
631		return;
632	if ( server_id.s_addr != dhcp->current_offer->server.s_addr )
633		return;
634
635	/* Record assigned address */
636	dhcp->local.sin_addr = ip;
637
638	/* Register settings */
639	parent = netdev_settings ( dhcp->netdev );
640	if ( ( rc = register_settings ( &dhcppkt->settings, parent ) ) != 0 ){
641		DBGC ( dhcp, "DHCP %p could not register settings: %s\n",
642		       dhcp, strerror ( rc ) );
643		dhcp_finished ( dhcp, rc );
644		return;
645	}
646
647	/* Locate best source of PXE settings */
648	pxe_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_PXE );
649
650	if ( ( ! pxe_offer ) || /* No PXE available */
651	     /* IP offer instructs us to ignore PXE */
652	     dhcp->current_offer->no_pxedhcp ||
653	     /* PXE settings already registered with IP offer */
654	     ( ( dhcp->current_offer == pxe_offer ) && ( pxe_offer->pxe ) ) ) {
655
656		/* Terminate DHCP */
657		dhcp_finished ( dhcp, 0 );
658
659	} else if ( pxe_offer->pxe ) {
660		/* Register PXE settings and terminate DHCP */
661		pxe_offer->pxe->settings.name = PROXYDHCP_SETTINGS_NAME;
662		if ( ( rc = register_settings ( &pxe_offer->pxe->settings,
663						NULL ) ) != 0 ) {
664			DBGC ( dhcp, "DHCP %p could not register settings: "
665			       "%s\n", dhcp, strerror ( rc ) );
666		}
667		dhcp_finished ( dhcp, rc );
668	} else {
669		/* Start ProxyDHCP */
670		dhcp_set_state ( dhcp, &dhcp_state_proxy );
671	}
672}
673
674/**
675 * Handle timer expiry during DHCP discovery
676 *
677 * @v dhcp		DHCP session
678 */
679static void dhcp_request_expired ( struct dhcp_session *dhcp ) {
680
681	/* Retransmit current packet */
682	dhcp_tx ( dhcp );
683}
684
685/** DHCP request state operations */
686static struct dhcp_session_state dhcp_state_request = {
687	.name			= "request",
688	.tx			= dhcp_request_tx,
689	.rx			= dhcp_request_rx,
690	.expired		= dhcp_request_expired,
691	.tx_msgtype		= DHCPREQUEST,
692	.apply_min_timeout	= 0,
693};
694
695/**
696 * Construct transmitted packet for ProxyDHCP request
697 *
698 * @v dhcp		DHCP session
699 * @v dhcppkt		DHCP packet
700 * @v peer		Destination address
701 */
702static int dhcp_proxy_tx ( struct dhcp_session *dhcp,
703			   struct dhcp_packet *dhcppkt,
704			   struct sockaddr_in *peer ) {
705	int rc;
706	struct dhcp_offer *offer;
707
708	offer = dhcp->current_offer = dhcp_next_offer ( dhcp, DHCP_OFFER_PXE );
709
710	DBGC ( dhcp, "DHCP %p ProxyDHCP REQUEST to %s:%d\n", dhcp,
711	       inet_ntoa ( offer->server ), PXE_PORT );
712
713	/* Set server ID */
714	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_SERVER_IDENTIFIER,
715				    &offer->server,
716				    sizeof ( offer->server ) ) )  != 0 )
717		return rc;
718
719	/* Set server address */
720	peer->sin_addr = offer->server;
721	peer->sin_port = htons ( PXE_PORT );
722
723	return 0;
724}
725
726/**
727 * Handle received packet during ProxyDHCP request
728 *
729 * @v dhcp		DHCP session
730 * @v dhcppkt		DHCP packet
731 * @v peer		DHCP server address
732 * @v msgtype		DHCP message type
733 * @v server_id		DHCP server ID
734 */
735static void dhcp_proxy_rx ( struct dhcp_session *dhcp,
736			    struct dhcp_packet *dhcppkt,
737			    struct sockaddr_in *peer, uint8_t msgtype,
738			    struct in_addr server_id ) {
739	int rc;
740
741	/* Enqueue last-minute DHCPOFFERs for use in case of failure */
742	if ( peer->sin_port == htons ( BOOTPS_PORT ) &&
743	     msgtype == DHCPOFFER ) {
744		dhcp_rx_offer ( dhcp, dhcppkt, peer, msgtype, server_id );
745		return;
746	}
747
748	DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
749	       dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
750	       ntohs ( peer->sin_port ) );
751	if ( server_id.s_addr != peer->sin_addr.s_addr )
752		DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
753	DBGC ( dhcp, "\n" );
754
755	/* Filter out unacceptable responses */
756	if ( peer->sin_port != htons ( PXE_PORT ) )
757		return;
758	if ( msgtype != DHCPACK && msgtype != DHCPOFFER )
759		return;
760	if ( server_id.s_addr /* Linux PXE server omits server ID */ &&
761	     ( server_id.s_addr != dhcp->current_offer->server.s_addr ) )
762		return;
763
764	/* Register settings */
765	dhcppkt->settings.name = PROXYDHCP_SETTINGS_NAME;
766	if ( ( rc = register_settings ( &dhcppkt->settings, NULL ) ) != 0 ) {
767		DBGC ( dhcp, "DHCP %p could not register settings: %s\n",
768		       dhcp, strerror ( rc ) );
769		dhcp_finished ( dhcp, rc );
770		return;
771	}
772
773	/* Terminate DHCP */
774	dhcp_finished ( dhcp, 0 );
775}
776
777/**
778 * Handle timer expiry during ProxyDHCP request
779 *
780 * @v dhcp		DHCP session
781 */
782static void dhcp_proxy_expired ( struct dhcp_session *dhcp ) {
783	unsigned long elapsed = ( currticks() - dhcp->start );
784
785	/* Give up waiting for ProxyDHCP before we reach the failure point */
786	if ( elapsed > PROXYDHCP_MAX_TIMEOUT ) {
787
788		/* Mark failed offer as unsuitable for ProxyDHCP */
789		dhcp->current_offer->valid &= ~DHCP_OFFER_PXE;
790
791		/* Prefer not to use only half of a PXE+IP offer if we
792		 * have other offers available
793		 */
794		dhcp->current_offer->priority = -1;
795
796		/* If we have any other PXE offers we can try, go back
797		 * to DHCPREQUEST (since they might not be proxied
798		 * offers, or might be coupled to a new IP address).
799		 * We should probably DHCPRELEASE our old IP, but the
800		 * standard does not require it.
801		 */
802		if ( dhcp_next_offer ( dhcp, DHCP_OFFER_PXE ) ) {
803			dhcp->local.sin_addr.s_addr = 0;
804			dhcp_set_state ( dhcp, &dhcp_state_request );
805			return;
806		}
807
808		/* No possibilities left; finish without PXE options */
809		dhcp_finished ( dhcp, 0 );
810		return;
811	}
812
813	/* Retransmit current packet */
814	dhcp_tx ( dhcp );
815}
816
817/** ProxyDHCP request state operations */
818static struct dhcp_session_state dhcp_state_proxy = {
819	.name			= "ProxyDHCP",
820	.tx			= dhcp_proxy_tx,
821	.rx			= dhcp_proxy_rx,
822	.expired		= dhcp_proxy_expired,
823	.tx_msgtype		= DHCPREQUEST,
824	.apply_min_timeout	= 0,
825};
826
827/**
828 * Construct transmitted packet for PXE Boot Server Discovery
829 *
830 * @v dhcp		DHCP session
831 * @v dhcppkt		DHCP packet
832 * @v peer		Destination address
833 */
834static int dhcp_pxebs_tx ( struct dhcp_session *dhcp,
835			   struct dhcp_packet *dhcppkt,
836			   struct sockaddr_in *peer ) {
837	struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 };
838	int rc;
839
840	/* Set server address */
841	peer->sin_addr = *(dhcp->pxe_attempt);
842	peer->sin_port = ( ( peer->sin_addr.s_addr == INADDR_BROADCAST ) ?
843			   htons ( BOOTPS_PORT ) : htons ( PXE_PORT ) );
844
845	DBGC ( dhcp, "DHCP %p PXEBS REQUEST to %s:%d for type %d\n",
846	       dhcp, inet_ntoa ( peer->sin_addr ), ntohs ( peer->sin_port ),
847	       le16_to_cpu ( dhcp->pxe_type ) );
848
849	/* Set boot menu item */
850	menu_item.type = dhcp->pxe_type;
851	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM,
852				    &menu_item, sizeof ( menu_item ) ) ) != 0 )
853		return rc;
854
855	return 0;
856}
857
858/**
859 * Check to see if PXE Boot Server address is acceptable
860 *
861 * @v dhcp		DHCP session
862 * @v bs		Boot Server address
863 * @ret accept		Boot Server is acceptable
864 */
865static int dhcp_pxebs_accept ( struct dhcp_session *dhcp,
866			       struct in_addr bs ) {
867	struct in_addr *accept;
868
869	/* Accept if we have no acceptance filter */
870	if ( ! dhcp->pxe_accept )
871		return 1;
872
873	/* Scan through acceptance list */
874	for ( accept = dhcp->pxe_accept ; accept->s_addr ; accept++ ) {
875		if ( accept->s_addr == bs.s_addr )
876			return 1;
877	}
878
879	DBGC ( dhcp, "DHCP %p rejecting server %s\n",
880	       dhcp, inet_ntoa ( bs ) );
881	return 0;
882}
883
884/**
885 * Handle received packet during PXE Boot Server Discovery
886 *
887 * @v dhcp		DHCP session
888 * @v dhcppkt		DHCP packet
889 * @v peer		DHCP server address
890 * @v msgtype		DHCP message type
891 * @v server_id		DHCP server ID
892 */
893static void dhcp_pxebs_rx ( struct dhcp_session *dhcp,
894			    struct dhcp_packet *dhcppkt,
895			    struct sockaddr_in *peer, uint8_t msgtype,
896			    struct in_addr server_id ) {
897	struct dhcp_pxe_boot_menu_item menu_item = { 0, 0 };
898	int rc;
899
900	DBGC ( dhcp, "DHCP %p %s from %s:%d", dhcp,
901	       dhcp_msgtype_name ( msgtype ), inet_ntoa ( peer->sin_addr ),
902	       ntohs ( peer->sin_port ) );
903	if ( server_id.s_addr != peer->sin_addr.s_addr )
904		DBGC ( dhcp, " (%s)", inet_ntoa ( server_id ) );
905
906	/* Identify boot menu item */
907	dhcppkt_fetch ( dhcppkt, DHCP_PXE_BOOT_MENU_ITEM,
908			&menu_item, sizeof ( menu_item ) );
909	if ( menu_item.type )
910		DBGC ( dhcp, " for type %d", ntohs ( menu_item.type ) );
911	DBGC ( dhcp, "\n" );
912
913	/* Filter out unacceptable responses */
914	if ( ( peer->sin_port != htons ( BOOTPS_PORT ) ) &&
915	     ( peer->sin_port != htons ( PXE_PORT ) ) )
916		return;
917	if ( msgtype != DHCPACK )
918		return;
919	if ( menu_item.type != dhcp->pxe_type )
920		return;
921	if ( ! dhcp_pxebs_accept ( dhcp, ( server_id.s_addr ?
922					   server_id : peer->sin_addr ) ) )
923		return;
924
925	/* Register settings */
926	dhcppkt->settings.name = PXEBS_SETTINGS_NAME;
927	if ( ( rc = register_settings ( &dhcppkt->settings, NULL ) ) != 0 ) {
928		DBGC ( dhcp, "DHCP %p could not register settings: %s\n",
929		       dhcp, strerror ( rc ) );
930		dhcp_finished ( dhcp, rc );
931		return;
932	}
933
934	/* Terminate DHCP */
935	dhcp_finished ( dhcp, 0 );
936}
937
938/**
939 * Handle timer expiry during PXE Boot Server Discovery
940 *
941 * @v dhcp		DHCP session
942 */
943static void dhcp_pxebs_expired ( struct dhcp_session *dhcp ) {
944	unsigned long elapsed = ( currticks() - dhcp->start );
945
946	/* Give up waiting before we reach the failure point, and fail
947	 * over to the next server in the attempt list
948	 */
949	if ( elapsed > PXEBS_MAX_TIMEOUT ) {
950		dhcp->pxe_attempt++;
951		if ( dhcp->pxe_attempt->s_addr ) {
952			dhcp_set_state ( dhcp, &dhcp_state_pxebs );
953			return;
954		} else {
955			dhcp_finished ( dhcp, -ETIMEDOUT );
956			return;
957		}
958	}
959
960	/* Retransmit current packet */
961	dhcp_tx ( dhcp );
962}
963
964/** PXE Boot Server Discovery state operations */
965static struct dhcp_session_state dhcp_state_pxebs = {
966	.name			= "PXEBS",
967	.tx			= dhcp_pxebs_tx,
968	.rx			= dhcp_pxebs_rx,
969	.expired		= dhcp_pxebs_expired,
970	.tx_msgtype		= DHCPREQUEST,
971	.apply_min_timeout	= 1,
972};
973
974/****************************************************************************
975 *
976 * Packet construction
977 *
978 */
979
980/**
981 * Construct DHCP client hardware address field and broadcast flag
982 *
983 * @v netdev		Network device
984 * @v hlen		DHCP hardware address length to fill in
985 * @v flags		DHCP flags to fill in
986 * @ret chaddr		DHCP client hardware address
987 */
988void * dhcp_chaddr ( struct net_device *netdev, uint8_t *hlen,
989		     uint16_t *flags ) {
990	struct ll_protocol *ll_protocol = netdev->ll_protocol;
991	typeof ( ( ( struct dhcphdr * ) NULL )->chaddr ) chaddr;
992
993	/* If the link-layer address cannot fit into the chaddr field
994	 * (as is the case for IPoIB) then try using the hardware
995	 * address instead.  If we do this, set the broadcast flag,
996	 * since chaddr then does not represent a valid link-layer
997	 * address for the return path.
998	 *
999	 * If even the hardware address is too large, use an empty
1000	 * chaddr field and set the broadcast flag.
1001	 *
1002	 * This goes against RFC4390, but RFC4390 mandates that we use
1003	 * a DHCP client identifier that conforms with RFC4361, which
1004	 * we cannot do without either persistent (NIC-independent)
1005	 * storage, or by eliminating the hardware address completely
1006	 * from the DHCP packet, which seems unfriendly to users.
1007	 */
1008	if ( ( *hlen = ll_protocol->ll_addr_len ) <= sizeof ( chaddr ) ) {
1009		return netdev->ll_addr;
1010	}
1011	*flags = htons ( BOOTP_FL_BROADCAST );
1012	if ( ( *hlen = ll_protocol->hw_addr_len ) <= sizeof ( chaddr ) ) {
1013		return netdev->hw_addr;
1014	} else {
1015		*hlen = 0;
1016		return NULL;
1017	}
1018}
1019
1020/**
1021 * Create a DHCP packet
1022 *
1023 * @v dhcppkt		DHCP packet structure to fill in
1024 * @v netdev		Network device
1025 * @v msgtype		DHCP message type
1026 * @v options		Initial options to include (or NULL)
1027 * @v options_len	Length of initial options
1028 * @v data		Buffer for DHCP packet
1029 * @v max_len		Size of DHCP packet buffer
1030 * @ret rc		Return status code
1031 *
1032 * Creates a DHCP packet in the specified buffer, and initialise a
1033 * DHCP packet structure.
1034 */
1035int dhcp_create_packet ( struct dhcp_packet *dhcppkt,
1036			 struct net_device *netdev, uint8_t msgtype,
1037			 const void *options, size_t options_len,
1038			 void *data, size_t max_len ) {
1039	struct dhcphdr *dhcphdr = data;
1040	void *chaddr;
1041	int rc;
1042
1043	/* Sanity check */
1044	if ( max_len < ( sizeof ( *dhcphdr ) + options_len ) )
1045		return -ENOSPC;
1046
1047	/* Initialise DHCP packet content */
1048	memset ( dhcphdr, 0, max_len );
1049	dhcphdr->xid = dhcp_xid ( netdev );
1050	dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE );
1051	dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto );
1052	dhcphdr->op = dhcp_op[msgtype];
1053	chaddr = dhcp_chaddr ( netdev, &dhcphdr->hlen, &dhcphdr->flags );
1054	memcpy ( dhcphdr->chaddr, chaddr, dhcphdr->hlen );
1055	memcpy ( dhcphdr->options, options, options_len );
1056
1057	/* Initialise DHCP packet structure */
1058	memset ( dhcppkt, 0, sizeof ( *dhcppkt ) );
1059	dhcppkt_init ( dhcppkt, data, max_len );
1060
1061	/* Set DHCP_MESSAGE_TYPE option */
1062	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_MESSAGE_TYPE,
1063				    &msgtype, sizeof ( msgtype ) ) ) != 0 )
1064		return rc;
1065
1066	return 0;
1067}
1068
1069/**
1070 * Create DHCP request packet
1071 *
1072 * @v dhcppkt		DHCP packet structure to fill in
1073 * @v netdev		Network device
1074 * @v msgtype		DHCP message type
1075 * @v ciaddr		Client IP address
1076 * @v data		Buffer for DHCP packet
1077 * @v max_len		Size of DHCP packet buffer
1078 * @ret rc		Return status code
1079 *
1080 * Creates a DHCP request packet in the specified buffer, and
1081 * initialise a DHCP packet structure.
1082 */
1083int dhcp_create_request ( struct dhcp_packet *dhcppkt,
1084			  struct net_device *netdev, unsigned int msgtype,
1085			  struct in_addr ciaddr, void *data, size_t max_len ) {
1086	struct dhcp_netdev_desc dhcp_desc;
1087	struct dhcp_client_id client_id;
1088	struct dhcp_client_uuid client_uuid;
1089	uint8_t *dhcp_features;
1090	size_t dhcp_features_len;
1091	size_t ll_addr_len;
1092	ssize_t len;
1093	int rc;
1094
1095	/* Create DHCP packet */
1096	if ( ( rc = dhcp_create_packet ( dhcppkt, netdev, msgtype,
1097					 dhcp_request_options_data,
1098					 sizeof ( dhcp_request_options_data ),
1099					 data, max_len ) ) != 0 ) {
1100		DBG ( "DHCP could not create DHCP packet: %s\n",
1101		      strerror ( rc ) );
1102		return rc;
1103	}
1104
1105	/* Set client IP address */
1106	dhcppkt->dhcphdr->ciaddr = ciaddr;
1107
1108	/* Add options to identify the feature list */
1109	dhcp_features = table_start ( DHCP_FEATURES );
1110	dhcp_features_len = table_num_entries ( DHCP_FEATURES );
1111	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_ENCAP, dhcp_features,
1112				    dhcp_features_len ) ) != 0 ) {
1113		DBG ( "DHCP could not set features list option: %s\n",
1114		      strerror ( rc ) );
1115		return rc;
1116	}
1117
1118	/* Add options to identify the network device */
1119	fetch_setting ( &netdev->settings.settings, &busid_setting, &dhcp_desc,
1120		sizeof ( dhcp_desc ) );
1121	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_EB_BUS_ID, &dhcp_desc,
1122				    sizeof ( dhcp_desc ) ) ) != 0 ) {
1123		DBG ( "DHCP could not set bus ID option: %s\n",
1124		      strerror ( rc ) );
1125		return rc;
1126	}
1127
1128	/* Add DHCP client identifier.  Required for Infiniband, and
1129	 * doesn't hurt other link layers.
1130	 */
1131	client_id.ll_proto = ntohs ( netdev->ll_protocol->ll_proto );
1132	ll_addr_len = netdev->ll_protocol->ll_addr_len;
1133	assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) );
1134	memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len );
1135	if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_ID, &client_id,
1136				    ( ll_addr_len + 1 ) ) ) != 0 ) {
1137		DBG ( "DHCP could not set client ID: %s\n",
1138		      strerror ( rc ) );
1139		return rc;
1140	}
1141
1142	/* Add client UUID, if we have one.  Required for PXE. */
1143	client_uuid.type = DHCP_CLIENT_UUID_TYPE;
1144	if ( ( len = fetch_uuid_setting ( NULL, &uuid_setting,
1145					  &client_uuid.uuid ) ) >= 0 ) {
1146		if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_CLIENT_UUID,
1147					    &client_uuid,
1148					    sizeof ( client_uuid ) ) ) != 0 ) {
1149			DBG ( "DHCP could not set client UUID: %s\n",
1150			      strerror ( rc ) );
1151			return rc;
1152		}
1153	}
1154
1155	/* Add user class, if we have one. */
1156	if ( ( len = fetch_setting_len ( NULL, &user_class_setting ) ) >= 0 ) {
1157		char user_class[len];
1158		fetch_setting ( NULL, &user_class_setting, user_class,
1159				sizeof ( user_class ) );
1160		if ( ( rc = dhcppkt_store ( dhcppkt, DHCP_USER_CLASS_ID,
1161					    &user_class,
1162					    sizeof ( user_class ) ) ) != 0 ) {
1163			DBG ( "DHCP could not set user class: %s\n",
1164			      strerror ( rc ) );
1165			return rc;
1166		}
1167	}
1168
1169	return 0;
1170}
1171
1172/****************************************************************************
1173 *
1174 * Data transfer interface
1175 *
1176 */
1177
1178/**
1179 * Transmit DHCP request
1180 *
1181 * @v dhcp		DHCP session
1182 * @ret rc		Return status code
1183 */
1184static int dhcp_tx ( struct dhcp_session *dhcp ) {
1185	static struct sockaddr_in peer = {
1186		.sin_family = AF_INET,
1187	};
1188	struct xfer_metadata meta = {
1189		.netdev = dhcp->netdev,
1190		.src = ( struct sockaddr * ) &dhcp->local,
1191		.dest = ( struct sockaddr * ) &peer,
1192	};
1193	struct io_buffer *iobuf;
1194	uint8_t msgtype = dhcp->state->tx_msgtype;
1195	struct dhcp_packet dhcppkt;
1196	int rc;
1197
1198	/* Start retry timer.  Do this first so that failures to
1199	 * transmit will be retried.
1200	 */
1201	start_timer ( &dhcp->timer );
1202
1203	/* Allocate buffer for packet */
1204	iobuf = xfer_alloc_iob ( &dhcp->xfer, DHCP_MIN_LEN );
1205	if ( ! iobuf )
1206		return -ENOMEM;
1207
1208	/* Create basic DHCP packet in temporary buffer */
1209	if ( ( rc = dhcp_create_request ( &dhcppkt, dhcp->netdev, msgtype,
1210					  dhcp->local.sin_addr, iobuf->data,
1211					  iob_tailroom ( iobuf ) ) ) != 0 ) {
1212		DBGC ( dhcp, "DHCP %p could not construct DHCP request: %s\n",
1213		       dhcp, strerror ( rc ) );
1214		goto done;
1215	}
1216
1217	/* Fill in packet based on current state */
1218	if ( ( rc = dhcp->state->tx ( dhcp, &dhcppkt, &peer ) ) != 0 ) {
1219		DBGC ( dhcp, "DHCP %p could not fill DHCP request: %s\n",
1220		       dhcp, strerror ( rc ) );
1221		goto done;
1222	}
1223
1224	/* Transmit the packet */
1225	iob_put ( iobuf, dhcppkt.len );
1226	if ( ( rc = xfer_deliver_iob_meta ( &dhcp->xfer, iob_disown ( iobuf ),
1227					    &meta ) ) != 0 ) {
1228		DBGC ( dhcp, "DHCP %p could not transmit UDP packet: %s\n",
1229		       dhcp, strerror ( rc ) );
1230		goto done;
1231	}
1232
1233 done:
1234	free_iob ( iobuf );
1235	return rc;
1236}
1237
1238/**
1239 * Receive new data
1240 *
1241 * @v xfer 		Data transfer interface
1242 * @v iobuf		I/O buffer
1243 * @v meta		Transfer metadata
1244 * @ret rc		Return status code
1245 */
1246static int dhcp_deliver_iob ( struct xfer_interface *xfer,
1247			      struct io_buffer *iobuf,
1248			      struct xfer_metadata *meta ) {
1249	struct dhcp_session *dhcp =
1250		container_of ( xfer, struct dhcp_session, xfer );
1251	struct sockaddr_in *peer;
1252	size_t data_len;
1253	struct dhcp_packet *dhcppkt;
1254	struct dhcphdr *dhcphdr;
1255	uint8_t msgtype = 0;
1256	struct in_addr server_id = { 0 };
1257	int rc = 0;
1258
1259	/* Sanity checks */
1260	if ( ! meta->src ) {
1261		DBGC ( dhcp, "DHCP %p received packet without source port\n",
1262		       dhcp );
1263		rc = -EINVAL;
1264		goto err_no_src;
1265	}
1266	peer = ( struct sockaddr_in * ) meta->src;
1267
1268	/* Create a DHCP packet containing the I/O buffer contents.
1269	 * Whilst we could just use the original buffer in situ, that
1270	 * would waste the unused space in the packet buffer, and also
1271	 * waste a relatively scarce fully-aligned I/O buffer.
1272	 */
1273	data_len = iob_len ( iobuf );
1274	dhcppkt = zalloc ( sizeof ( *dhcppkt ) + data_len );
1275	if ( ! dhcppkt ) {
1276		rc = -ENOMEM;
1277		goto err_alloc_dhcppkt;
1278	}
1279	dhcphdr = ( ( ( void * ) dhcppkt ) + sizeof ( *dhcppkt ) );
1280	memcpy ( dhcphdr, iobuf->data, data_len );
1281	dhcppkt_init ( dhcppkt, dhcphdr, data_len );
1282
1283	/* Identify message type */
1284	dhcppkt_fetch ( dhcppkt, DHCP_MESSAGE_TYPE, &msgtype,
1285			sizeof ( msgtype ) );
1286
1287	/* Identify server ID */
1288	dhcppkt_fetch ( dhcppkt, DHCP_SERVER_IDENTIFIER,
1289			&server_id, sizeof ( server_id ) );
1290
1291	/* Check for matching transaction ID */
1292	if ( dhcphdr->xid != dhcp_xid ( dhcp->netdev ) ) {
1293		DBGC ( dhcp, "DHCP %p %s from %s:%d has bad transaction "
1294		       "ID\n", dhcp, dhcp_msgtype_name ( msgtype ),
1295		       inet_ntoa ( peer->sin_addr ),
1296		       ntohs ( peer->sin_port ) );
1297		rc = -EINVAL;
1298		goto err_xid;
1299	};
1300
1301	/* Handle packet based on current state */
1302	dhcp->state->rx ( dhcp, dhcppkt, peer, msgtype, server_id );
1303
1304 err_xid:
1305	dhcppkt_put ( dhcppkt );
1306 err_alloc_dhcppkt:
1307 err_no_src:
1308	free_iob ( iobuf );
1309	return rc;
1310}
1311
1312/** DHCP data transfer interface operations */
1313static struct xfer_interface_operations dhcp_xfer_operations = {
1314	.close		= ignore_xfer_close,
1315	.vredirect	= xfer_vreopen,
1316	.window		= unlimited_xfer_window,
1317	.alloc_iob	= default_xfer_alloc_iob,
1318	.deliver_iob	= dhcp_deliver_iob,
1319	.deliver_raw	= xfer_deliver_as_iob,
1320};
1321
1322/**
1323 * Handle DHCP retry timer expiry
1324 *
1325 * @v timer		DHCP retry timer
1326 * @v fail		Failure indicator
1327 */
1328static void dhcp_timer_expired ( struct retry_timer *timer, int fail ) {
1329	struct dhcp_session *dhcp =
1330		container_of ( timer, struct dhcp_session, timer );
1331
1332	/* If we have failed, terminate DHCP */
1333	if ( fail ) {
1334		dhcp_finished ( dhcp, -ETIMEDOUT );
1335		return;
1336	}
1337
1338	/* Handle timer expiry based on current state */
1339	dhcp->state->expired ( dhcp );
1340}
1341
1342/****************************************************************************
1343 *
1344 * Job control interface
1345 *
1346 */
1347
1348/**
1349 * Handle kill() event received via job control interface
1350 *
1351 * @v job		DHCP job control interface
1352 */
1353static void dhcp_job_kill ( struct job_interface *job ) {
1354	struct dhcp_session *dhcp =
1355		container_of ( job, struct dhcp_session, job );
1356
1357	/* Terminate DHCP session */
1358	dhcp_finished ( dhcp, -ECANCELED );
1359}
1360
1361/** DHCP job control interface operations */
1362static struct job_interface_operations dhcp_job_operations = {
1363	.done		= ignore_job_done,
1364	.kill		= dhcp_job_kill,
1365	.progress	= ignore_job_progress,
1366};
1367
1368/****************************************************************************
1369 *
1370 * Instantiators
1371 *
1372 */
1373
1374/**
1375 * DHCP peer address for socket opening
1376 *
1377 * This is a dummy address; the only useful portion is the socket
1378 * family (so that we get a UDP connection).  The DHCP client will set
1379 * the IP address and source port explicitly on each transmission.
1380 */
1381static struct sockaddr dhcp_peer = {
1382	.sa_family = AF_INET,
1383};
1384
1385/**
1386 * Start DHCP state machine on a network device
1387 *
1388 * @v job		Job control interface
1389 * @v netdev		Network device
1390 * @ret rc		Return status code, or positive if cached
1391 *
1392 * Starts DHCP on the specified network device.  If successful, the
1393 * DHCPACK (and ProxyDHCPACK, if applicable) will be registered as
1394 * option sources.
1395 *
1396 * On a return of 0, a background job has been started to perform the
1397 * DHCP request. Any nonzero return means the job has not been
1398 * started; a positive return value indicates the success condition of
1399 * having fetched the appropriate data from cached information.
1400 */
1401int start_dhcp ( struct job_interface *job, struct net_device *netdev ) {
1402	struct dhcp_session *dhcp;
1403	int rc;
1404
1405	/* Check for cached DHCP information */
1406	get_cached_dhcpack();
1407	if ( fetch_uintz_setting ( NULL, &use_cached_setting ) ) {
1408		DBG ( "DHCP using cached network settings\n" );
1409		return 1;
1410	}
1411
1412	/* Allocate and initialise structure */
1413	dhcp = zalloc ( sizeof ( *dhcp ) );
1414	if ( ! dhcp )
1415		return -ENOMEM;
1416	dhcp->refcnt.free = dhcp_free;
1417	job_init ( &dhcp->job, &dhcp_job_operations, &dhcp->refcnt );
1418	xfer_init ( &dhcp->xfer, &dhcp_xfer_operations, &dhcp->refcnt );
1419	dhcp->netdev = netdev_get ( netdev );
1420	dhcp->local.sin_family = AF_INET;
1421	dhcp->local.sin_port = htons ( BOOTPC_PORT );
1422	dhcp->timer.expired = dhcp_timer_expired;
1423
1424	/* Instantiate child objects and attach to our interfaces */
1425	if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer,
1426				  ( struct sockaddr * ) &dhcp->local ) ) != 0 )
1427		goto err;
1428
1429	/* Enter DHCPDISCOVER state */
1430	dhcp_set_state ( dhcp, &dhcp_state_discover );
1431
1432	/* Attach parent interface, mortalise self, and return */
1433	job_plug_plug ( &dhcp->job, job );
1434	ref_put ( &dhcp->refcnt );
1435	return 0;
1436
1437 err:
1438	dhcp_finished ( dhcp, rc );
1439	ref_put ( &dhcp->refcnt );
1440	return rc;
1441}
1442
1443/**
1444 * Retrieve list of PXE boot servers for a given server type
1445 *
1446 * @v dhcp		DHCP session
1447 * @v raw		DHCP PXE boot server list
1448 * @v raw_len		Length of DHCP PXE boot server list
1449 * @v ip		IP address list to fill in
1450 *
1451 * The caller must ensure that the IP address list has sufficient
1452 * space.
1453 */
1454static void pxebs_list ( struct dhcp_session *dhcp, void *raw,
1455			 size_t raw_len, struct in_addr *ip ) {
1456	struct dhcp_pxe_boot_server *server = raw;
1457	size_t server_len;
1458	unsigned int i;
1459
1460	while ( raw_len ) {
1461		if ( raw_len < sizeof ( *server ) ) {
1462			DBGC ( dhcp, "DHCP %p malformed PXE server list\n",
1463			       dhcp );
1464			break;
1465		}
1466		server_len = offsetof ( typeof ( *server ),
1467					ip[ server->num_ip ] );
1468		if ( raw_len < server_len ) {
1469			DBGC ( dhcp, "DHCP %p malformed PXE server list\n",
1470			       dhcp );
1471			break;
1472		}
1473		if ( server->type == dhcp->pxe_type ) {
1474			for ( i = 0 ; i < server->num_ip ; i++ )
1475				*(ip++) = server->ip[i];
1476		}
1477		server = ( ( ( void * ) server ) + server_len );
1478		raw_len -= server_len;
1479	}
1480}
1481
1482/**
1483 * Start PXE Boot Server Discovery on a network device
1484 *
1485 * @v job		Job control interface
1486 * @v netdev		Network device
1487 * @v pxe_type		PXE server type
1488 * @ret rc		Return status code
1489 *
1490 * Starts PXE Boot Server Discovery on the specified network device.
1491 * If successful, the Boot Server ACK will be registered as an option
1492 * source.
1493 */
1494int start_pxebs ( struct job_interface *job, struct net_device *netdev,
1495		  unsigned int pxe_type ) {
1496	struct setting pxe_discovery_control_setting =
1497		{ .tag = DHCP_PXE_DISCOVERY_CONTROL };
1498	struct setting pxe_boot_servers_setting =
1499		{ .tag = DHCP_PXE_BOOT_SERVERS };
1500	struct setting pxe_boot_server_mcast_setting =
1501		{ .tag = DHCP_PXE_BOOT_SERVER_MCAST };
1502	ssize_t pxebs_list_len;
1503	struct dhcp_session *dhcp;
1504	struct in_addr *ip;
1505	unsigned int pxe_discovery_control;
1506	int rc;
1507
1508	/* Get upper bound for PXE boot server IP address list */
1509	pxebs_list_len = fetch_setting_len ( NULL, &pxe_boot_servers_setting );
1510	if ( pxebs_list_len < 0 )
1511		pxebs_list_len = 0;
1512
1513	/* Allocate and initialise structure */
1514	dhcp = zalloc ( sizeof ( *dhcp ) + sizeof ( *ip ) /* mcast */ +
1515			sizeof ( *ip ) /* bcast */ + pxebs_list_len +
1516			sizeof ( *ip ) /* terminator */ );
1517	if ( ! dhcp )
1518		return -ENOMEM;
1519	dhcp->refcnt.free = dhcp_free;
1520	job_init ( &dhcp->job, &dhcp_job_operations, &dhcp->refcnt );
1521	xfer_init ( &dhcp->xfer, &dhcp_xfer_operations, &dhcp->refcnt );
1522	dhcp->netdev = netdev_get ( netdev );
1523	dhcp->local.sin_family = AF_INET;
1524	fetch_ipv4_setting ( netdev_settings ( netdev ), &ip_setting,
1525			     &dhcp->local.sin_addr );
1526	dhcp->local.sin_port = htons ( BOOTPC_PORT );
1527	dhcp->pxe_type = cpu_to_le16 ( pxe_type );
1528	dhcp->timer.expired = dhcp_timer_expired;
1529
1530	/* Construct PXE boot server IP address lists */
1531	pxe_discovery_control =
1532		fetch_uintz_setting ( NULL, &pxe_discovery_control_setting );
1533	ip = ( ( ( void * ) dhcp ) + sizeof ( *dhcp ) );
1534	dhcp->pxe_attempt = ip;
1535	if ( ! ( pxe_discovery_control & PXEBS_NO_MULTICAST ) ) {
1536		fetch_ipv4_setting ( NULL, &pxe_boot_server_mcast_setting, ip);
1537		if ( ip->s_addr )
1538			ip++;
1539	}
1540	if ( ! ( pxe_discovery_control & PXEBS_NO_BROADCAST ) )
1541		(ip++)->s_addr = INADDR_BROADCAST;
1542	if ( pxe_discovery_control & PXEBS_NO_UNKNOWN_SERVERS )
1543		dhcp->pxe_accept = ip;
1544	if ( pxebs_list_len ) {
1545		uint8_t buf[pxebs_list_len];
1546
1547		fetch_setting ( NULL, &pxe_boot_servers_setting,
1548				buf, sizeof ( buf ) );
1549		pxebs_list ( dhcp, buf, sizeof ( buf ), ip );
1550	}
1551	if ( ! dhcp->pxe_attempt->s_addr ) {
1552		DBGC ( dhcp, "DHCP %p has no PXE boot servers for type %04x\n",
1553		       dhcp, pxe_type );
1554		rc = -EINVAL;
1555		goto err;
1556	}
1557
1558	/* Dump out PXE server lists */
1559	DBGC ( dhcp, "DHCP %p attempting", dhcp );
1560	for ( ip = dhcp->pxe_attempt ; ip->s_addr ; ip++ )
1561		DBGC ( dhcp, " %s", inet_ntoa ( *ip ) );
1562	DBGC ( dhcp, "\n" );
1563	if ( dhcp->pxe_accept ) {
1564		DBGC ( dhcp, "DHCP %p accepting", dhcp );
1565		for ( ip = dhcp->pxe_accept ; ip->s_addr ; ip++ )
1566			DBGC ( dhcp, " %s", inet_ntoa ( *ip ) );
1567		DBGC ( dhcp, "\n" );
1568	}
1569
1570	/* Instantiate child objects and attach to our interfaces */
1571	if ( ( rc = xfer_open_socket ( &dhcp->xfer, SOCK_DGRAM, &dhcp_peer,
1572				  ( struct sockaddr * ) &dhcp->local ) ) != 0 )
1573		goto err;
1574
1575	/* Enter PXEBS state */
1576	dhcp_set_state ( dhcp, &dhcp_state_pxebs );
1577
1578	/* Attach parent interface, mortalise self, and return */
1579	job_plug_plug ( &dhcp->job, job );
1580	ref_put ( &dhcp->refcnt );
1581	return 0;
1582
1583 err:
1584	dhcp_finished ( dhcp, rc );
1585	ref_put ( &dhcp->refcnt );
1586	return rc;
1587}
1588