1/*	$OpenBSD: poll.c,v 1.2 2002/06/25 15:50:15 mickey Exp $	*/
2
3/*
4 * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <sys/types.h>
34#ifdef HAVE_SYS_TIME_H
35#include <sys/time.h>
36#else
37#include <sys/_libevent_time.h>
38#endif
39#include <sys/queue.h>
40#include <poll.h>
41#include <signal.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <errno.h>
47#ifdef CHECK_INVARIANTS
48#include <assert.h>
49#endif
50
51#include "event.h"
52#include "event-internal.h"
53#include "evsignal.h"
54#include "log.h"
55
56struct pollop {
57	int event_count;		/* Highest number alloc */
58	int nfds;                       /* Size of event_* */
59	int fd_count;                   /* Size of idxplus1_by_fd */
60	struct pollfd *event_set;
61	struct event **event_r_back;
62	struct event **event_w_back;
63	int *idxplus1_by_fd; /* Index into event_set by fd; we add 1 so
64			      * that 0 (which is easy to memset) can mean
65			      * "no entry." */
66};
67
68static void *poll_init	(struct event_base *);
69static int poll_add		(void *, struct event *);
70static int poll_del		(void *, struct event *);
71static int poll_dispatch	(struct event_base *, void *, struct timeval *);
72static void poll_dealloc	(struct event_base *, void *);
73
74const struct eventop pollops = {
75	"poll",
76	poll_init,
77	poll_add,
78	poll_del,
79	poll_dispatch,
80	poll_dealloc,
81    0
82};
83
84static void *
85poll_init(struct event_base *base)
86{
87	struct pollop *pollop;
88
89	/* Disable poll when this environment variable is set */
90	if (evutil_getenv("EVENT_NOPOLL"))
91		return (NULL);
92
93	if (!(pollop = calloc(1, sizeof(struct pollop))))
94		return (NULL);
95
96	evsignal_init(base);
97
98	return (pollop);
99}
100
101#ifdef CHECK_INVARIANTS
102static void
103poll_check_ok(struct pollop *pop)
104{
105	int i, idx;
106	struct event *ev;
107
108	for (i = 0; i < pop->fd_count; ++i) {
109		idx = pop->idxplus1_by_fd[i]-1;
110		if (idx < 0)
111			continue;
112		assert(pop->event_set[idx].fd == i);
113		if (pop->event_set[idx].events & POLLIN) {
114			ev = pop->event_r_back[idx];
115			assert(ev);
116			assert(ev->ev_events & EV_READ);
117			assert(ev->ev_fd == i);
118		}
119		if (pop->event_set[idx].events & POLLOUT) {
120			ev = pop->event_w_back[idx];
121			assert(ev);
122			assert(ev->ev_events & EV_WRITE);
123			assert(ev->ev_fd == i);
124		}
125	}
126	for (i = 0; i < pop->nfds; ++i) {
127		struct pollfd *pfd = &pop->event_set[i];
128		assert(pop->idxplus1_by_fd[pfd->fd] == i+1);
129	}
130}
131#else
132#define poll_check_ok(pop)
133#endif
134
135static int
136poll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
137{
138	int res, i, j, msec = -1, nfds;
139	struct pollop *pop = arg;
140
141	poll_check_ok(pop);
142
143	if (tv != NULL)
144		msec = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
145
146	nfds = pop->nfds;
147	res = poll(pop->event_set, nfds, msec);
148
149	if (res == -1) {
150		if (errno != EINTR) {
151                        event_warn("poll");
152			return (-1);
153		}
154
155		evsignal_process(base);
156		return (0);
157	} else if (base->sig.evsignal_caught) {
158		evsignal_process(base);
159	}
160
161	event_debug(("%s: poll reports %d", __func__, res));
162
163	if (res == 0 || nfds == 0)
164		return (0);
165
166	i = random() % nfds;
167	for (j = 0; j < nfds; j++) {
168		struct event *r_ev = NULL, *w_ev = NULL;
169		int what;
170		if (++i == nfds)
171			i = 0;
172		what = pop->event_set[i].revents;
173
174		if (!what)
175			continue;
176
177		res = 0;
178
179		/* If the file gets closed notify */
180		if (what & (POLLHUP|POLLERR))
181			what |= POLLIN|POLLOUT;
182		if (what & POLLIN) {
183			res |= EV_READ;
184			r_ev = pop->event_r_back[i];
185		}
186		if (what & POLLOUT) {
187			res |= EV_WRITE;
188			w_ev = pop->event_w_back[i];
189		}
190		if (res == 0)
191			continue;
192
193		if (r_ev && (res & r_ev->ev_events)) {
194			event_active(r_ev, res & r_ev->ev_events, 1);
195		}
196		if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) {
197			event_active(w_ev, res & w_ev->ev_events, 1);
198		}
199	}
200
201	return (0);
202}
203
204static int
205poll_add(void *arg, struct event *ev)
206{
207	struct pollop *pop = arg;
208	struct pollfd *pfd = NULL;
209	int i;
210
211	if (ev->ev_events & EV_SIGNAL)
212		return (evsignal_add(ev));
213	if (!(ev->ev_events & (EV_READ|EV_WRITE)))
214		return (0);
215
216	poll_check_ok(pop);
217	if (pop->nfds + 1 >= pop->event_count) {
218		struct pollfd *tmp_event_set;
219		struct event **tmp_event_r_back;
220		struct event **tmp_event_w_back;
221		int tmp_event_count;
222
223		if (pop->event_count < 32)
224			tmp_event_count = 32;
225		else
226			tmp_event_count = pop->event_count * 2;
227
228		/* We need more file descriptors */
229		tmp_event_set = realloc(pop->event_set,
230				 tmp_event_count * sizeof(struct pollfd));
231		if (tmp_event_set == NULL) {
232			event_warn("realloc");
233			return (-1);
234		}
235		pop->event_set = tmp_event_set;
236
237		tmp_event_r_back = realloc(pop->event_r_back,
238			    tmp_event_count * sizeof(struct event *));
239		if (tmp_event_r_back == NULL) {
240			/* event_set overallocated; that's okay. */
241			event_warn("realloc");
242			return (-1);
243		}
244		pop->event_r_back = tmp_event_r_back;
245
246		tmp_event_w_back = realloc(pop->event_w_back,
247			    tmp_event_count * sizeof(struct event *));
248		if (tmp_event_w_back == NULL) {
249			/* event_set and event_r_back overallocated; that's
250			 * okay. */
251			event_warn("realloc");
252			return (-1);
253		}
254		pop->event_w_back = tmp_event_w_back;
255
256		pop->event_count = tmp_event_count;
257	}
258	if (ev->ev_fd >= pop->fd_count) {
259		int *tmp_idxplus1_by_fd;
260		int new_count;
261		if (pop->fd_count < 32)
262			new_count = 32;
263		else
264			new_count = pop->fd_count * 2;
265		while (new_count <= ev->ev_fd)
266			new_count *= 2;
267		tmp_idxplus1_by_fd =
268			realloc(pop->idxplus1_by_fd, new_count * sizeof(int));
269		if (tmp_idxplus1_by_fd == NULL) {
270			event_warn("realloc");
271			return (-1);
272		}
273		pop->idxplus1_by_fd = tmp_idxplus1_by_fd;
274		memset(pop->idxplus1_by_fd + pop->fd_count,
275		       0, sizeof(int)*(new_count - pop->fd_count));
276		pop->fd_count = new_count;
277	}
278
279	i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
280	if (i >= 0) {
281		pfd = &pop->event_set[i];
282	} else {
283		i = pop->nfds++;
284		pfd = &pop->event_set[i];
285		pfd->events = 0;
286		pfd->fd = ev->ev_fd;
287		pop->event_w_back[i] = pop->event_r_back[i] = NULL;
288		pop->idxplus1_by_fd[ev->ev_fd] = i + 1;
289	}
290
291	pfd->revents = 0;
292	if (ev->ev_events & EV_WRITE) {
293		pfd->events |= POLLOUT;
294		pop->event_w_back[i] = ev;
295	}
296	if (ev->ev_events & EV_READ) {
297		pfd->events |= POLLIN;
298		pop->event_r_back[i] = ev;
299	}
300	poll_check_ok(pop);
301
302	return (0);
303}
304
305/*
306 * Nothing to be done here.
307 */
308
309static int
310poll_del(void *arg, struct event *ev)
311{
312	struct pollop *pop = arg;
313	struct pollfd *pfd = NULL;
314	int i;
315
316	if (ev->ev_events & EV_SIGNAL)
317		return (evsignal_del(ev));
318
319	if (!(ev->ev_events & (EV_READ|EV_WRITE)))
320		return (0);
321
322	poll_check_ok(pop);
323	i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
324	if (i < 0)
325		return (-1);
326
327	/* Do we still want to read or write? */
328	pfd = &pop->event_set[i];
329	if (ev->ev_events & EV_READ) {
330		pfd->events &= ~POLLIN;
331		pop->event_r_back[i] = NULL;
332	}
333	if (ev->ev_events & EV_WRITE) {
334		pfd->events &= ~POLLOUT;
335		pop->event_w_back[i] = NULL;
336	}
337	poll_check_ok(pop);
338	if (pfd->events)
339		/* Another event cares about that fd. */
340		return (0);
341
342	/* Okay, so we aren't interested in that fd anymore. */
343	pop->idxplus1_by_fd[ev->ev_fd] = 0;
344
345	--pop->nfds;
346	if (i != pop->nfds) {
347		/*
348		 * Shift the last pollfd down into the now-unoccupied
349		 * position.
350		 */
351		memcpy(&pop->event_set[i], &pop->event_set[pop->nfds],
352		       sizeof(struct pollfd));
353		pop->event_r_back[i] = pop->event_r_back[pop->nfds];
354		pop->event_w_back[i] = pop->event_w_back[pop->nfds];
355		pop->idxplus1_by_fd[pop->event_set[i].fd] = i + 1;
356	}
357
358	poll_check_ok(pop);
359	return (0);
360}
361
362static void
363poll_dealloc(struct event_base *base, void *arg)
364{
365	struct pollop *pop = arg;
366
367	evsignal_dealloc(base);
368	if (pop->event_set)
369		free(pop->event_set);
370	if (pop->event_r_back)
371		free(pop->event_r_back);
372	if (pop->event_w_back)
373		free(pop->event_w_back);
374	if (pop->idxplus1_by_fd)
375		free(pop->idxplus1_by_fd);
376
377	memset(pop, 0, sizeof(struct pollop));
378	free(pop);
379}
380