1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
6#define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
7
8#include <fcntl.h>
9#include <sys/queue.h>
10#include <map>
11#include <string>
12#include <utility>
13#include <set>
14#include <vector>
15
16// #define EPOLL_SERVER_EVENT_TRACING 1
17//
18// Defining EPOLL_SERVER_EVENT_TRACING
19// causes code to exist which didn't before.
20// This code tracks each event generated by the epollserver,
21// as well as providing a per-fd-registered summary of
22// events. Note that enabling this code vastly slows
23// down operations, and uses substantially more
24// memory. For these reasons, it should only be enabled when doing
25// developer debugging at his/her workstation.
26//
27// A structure called 'EventRecorder' will exist when
28// the macro is defined. See the EventRecorder class interface
29// within the EpollServer class for more details.
30#ifdef EPOLL_SERVER_EVENT_TRACING
31#include <ostream>
32#include "base/logging.h"
33#endif
34
35#include "base/basictypes.h"
36#include "base/compiler_specific.h"
37#include "base/containers/hash_tables.h"
38#include "base/memory/scoped_ptr.h"
39#include <sys/epoll.h>
40
41namespace net {
42
43class EpollServer;
44class EpollAlarmCallbackInterface;
45class ReadPipeCallback;
46
47struct EpollEvent {
48  EpollEvent(int events, bool is_epoll_wait)
49      : in_events(events),
50        out_ready_mask(0) {
51  }
52
53  int in_events;            // incoming events
54  int out_ready_mask;       // the new event mask for ready list (0 means don't
55                            // get on the ready list). This field is always
56                            // initialized to 0 when the event is passed to
57                            // OnEvent.
58};
59
60// Callbacks which go into EpollServers are expected to derive from this class.
61class EpollCallbackInterface {
62 public:
63  // Summary:
64  //   Called when the callback is registered into a EpollServer.
65  // Args:
66  //   eps - the poll server into which this callback was registered
67  //   fd - the file descriptor which was registered
68  //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
69  //                which was registered (and will initially be used
70  //                in the epoll() calls)
71  virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
72
73  // Summary:
74  //   Called when the event_mask is modified (for a file-descriptor)
75  // Args:
76  //   fd - the file descriptor which was registered
77  //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
78  //                which was is now curren (and will be used
79  //                in subsequent epoll() calls)
80  virtual void OnModification(int fd, int event_mask) = 0;
81
82  // Summary:
83  //   Called whenever an event occurs on the file-descriptor.
84  //   This is where the bulk of processing is expected to occur.
85  // Args:
86  //   fd - the file descriptor which was registered
87  //   event - a struct that contains the event mask (composed of EPOLLIN,
88  //           EPOLLOUT, etc), a flag that indicates whether this is a true
89  //           epoll_wait event vs one from the ready list, and an output
90  //           parameter for OnEvent to inform the EpollServer whether to put
91  //           this fd on the ready list.
92  virtual void OnEvent(int fd, EpollEvent* event) = 0;
93
94  // Summary:
95  //   Called when the file-descriptor is unregistered from the poll-server.
96  // Args:
97  //   fd - the file descriptor which was registered, and of this call, is now
98  //        unregistered.
99  //   replaced - If true, this callback is being replaced by another, otherwise
100  //              it is simply being removed.
101  virtual void OnUnregistration(int fd, bool replaced) = 0;
102
103  // Summary:
104  //   Called when the epoll server is shutting down.  This is different from
105  //   OnUnregistration because the subclass may want to clean up memory.
106  //   This is called in leiu of OnUnregistration.
107  // Args:
108  //  fd - the file descriptor which was registered.
109  virtual void OnShutdown(EpollServer* eps, int fd) = 0;
110
111  virtual ~EpollCallbackInterface() {}
112
113 protected:
114  EpollCallbackInterface() {}
115};
116
117////////////////////////////////////////////////////////////////////////////////
118////////////////////////////////////////////////////////////////////////////////
119
120class EpollServer {
121 public:
122  typedef EpollAlarmCallbackInterface AlarmCB;
123  typedef EpollCallbackInterface CB;
124
125  typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap;
126  typedef TimeToAlarmCBMap::iterator AlarmRegToken;
127
128  // Summary:
129  //   Constructor:
130  //    By default, we don't wait any amount of time for events, and
131  //    we suggest to the epoll-system that we're going to use on-the-order
132  //    of 1024 FDs.
133  EpollServer();
134
135  ////////////////////////////////////////
136
137  // Destructor
138  virtual ~EpollServer();
139
140  ////////////////////////////////////////
141
142  // Summary
143  //   Register a callback to be called whenever an event contained
144  //   in the set of events included in event_mask occurs on the
145  //   file-descriptor 'fd'
146  //
147  //   Note that only one callback is allowed to be registered for
148  //   any specific file-decriptor.
149  //
150  //   If a callback is registered for a file-descriptor which has already
151  //   been registered, then the previous callback is unregistered with
152  //   the 'replaced' flag set to true. I.e. the previous callback's
153  //   OnUnregistration() function is called like so:
154  //      OnUnregistration(fd, true);
155  //
156  //  The epoll server does NOT take on ownership of the callback: the callback
157  //  creator is responsible for managing that memory.
158  //
159  // Args:
160  //   fd - a valid file-descriptor
161  //   cb - an instance of a subclass of EpollCallbackInterface
162  //   event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
163  //                the events for which the callback would like to be
164  //                called.
165  virtual void RegisterFD(int fd, CB* cb, int event_mask);
166
167  ////////////////////////////////////////
168
169  // Summary:
170  //   A shortcut for RegisterFD which sets things up such that the
171  //   callback is called when 'fd' is available for writing.
172  // Args:
173  //   fd - a valid file-descriptor
174  //   cb - an instance of a subclass of EpollCallbackInterface
175  virtual void RegisterFDForWrite(int fd, CB* cb);
176
177  ////////////////////////////////////////
178
179  // Summary:
180  //   A shortcut for RegisterFD which sets things up such that the
181  //   callback is called when 'fd' is available for reading or writing.
182  // Args:
183  //   fd - a valid file-descriptor
184  //   cb - an instance of a subclass of EpollCallbackInterface
185  virtual void RegisterFDForReadWrite(int fd, CB* cb);
186
187  ////////////////////////////////////////
188
189  // Summary:
190  //   A shortcut for RegisterFD which sets things up such that the
191  //   callback is called when 'fd' is available for reading.
192  // Args:
193  //   fd - a valid file-descriptor
194  //   cb - an instance of a subclass of EpollCallbackInterface
195  virtual void RegisterFDForRead(int fd, CB* cb);
196
197  ////////////////////////////////////////
198
199  // Summary:
200  //   Removes the FD and the associated callback from the pollserver.
201  //   If the callback is registered with other FDs, they will continue
202  //   to be processed using the callback without modification.
203  //   If the file-descriptor specified is not registered in the
204  //   epoll_server, then nothing happens as a result of this call.
205  // Args:
206  //   fd - the file-descriptor which should no-longer be monitored.
207  virtual void UnregisterFD(int fd);
208
209  ////////////////////////////////////////
210
211  // Summary:
212  //   Modifies the event mask for the file-descriptor, replacing
213  //   the old event_mask with the new one specified here.
214  //   If the file-descriptor specified is not registered in the
215  //   epoll_server, then nothing happens as a result of this call.
216  // Args:
217  //   fd - the fd whose event mask should be modified.
218  //   event_mask - the new event mask.
219  virtual void ModifyCallback(int fd, int event_mask);
220
221  ////////////////////////////////////////
222
223  // Summary:
224  //   Modifies the event mask for the file-descriptor such that we
225  //   no longer request events when 'fd' is readable.
226  //   If the file-descriptor specified is not registered in the
227  //   epoll_server, then nothing happens as a result of this call.
228  // Args:
229  //   fd - the fd whose event mask should be modified.
230  virtual void StopRead(int fd);
231
232  ////////////////////////////////////////
233
234  // Summary:
235  //   Modifies the event mask for the file-descriptor such that we
236  //   request events when 'fd' is readable.
237  //   If the file-descriptor specified is not registered in the
238  //   epoll_server, then nothing happens as a result of this call.
239  // Args:
240  //   fd - the fd whose event mask should be modified.
241  virtual void StartRead(int fd);
242
243  ////////////////////////////////////////
244
245  // Summary:
246  //   Modifies the event mask for the file-descriptor such that we
247  //   no longer request events when 'fd' is writable.
248  //   If the file-descriptor specified is not registered in the
249  //   epoll_server, then nothing happens as a result of this call.
250  // Args:
251  //   fd - the fd whose event mask should be modified.
252  virtual void StopWrite(int fd);
253
254  ////////////////////////////////////////
255
256  // Summary:
257  //   Modifies the event mask for the file-descriptor such that we
258  //   request events when 'fd' is writable.
259  //   If the file-descriptor specified is not registered in the
260  //   epoll_server, then nothing happens as a result of this call.
261  // Args:
262  //   fd - the fd whose event mask should be modified.
263  virtual void StartWrite(int fd);
264
265  ////////////////////////////////////////
266
267  // Summary:
268  //   Looks up the callback associated with the file-desriptor 'fd'.
269  //   If a callback is associated with this file-descriptor, then
270  //   it's OnEvent() method is called with the file-descriptor 'fd',
271  //   and event_mask 'event_mask'
272  //
273  //   If no callback is registered for this file-descriptor, nothing
274  //   will happen as a result of this call.
275  //
276  //   This function is used internally by the EpollServer, but is
277  //   available publically so that events might be 'faked'. Calling
278  //   this function with an fd and event_mask is equivalent (as far
279  //   as the callback is concerned) to having a real event generated
280  //   by epoll (except, of course, that read(), etc won't necessarily
281  //   be able to read anything)
282  // Args:
283  //   fd - the file-descriptor on which an event has occured.
284  //   event_mask - a bitmask representing the events which have occured
285  //                on/for this fd. This bitmask is composed of
286  //                POLLIN, POLLOUT, etc.
287  //
288  void HandleEvent(int fd, int event_mask);
289
290  // Summary:
291  //   Call this when you want the pollserver to
292  //   wait for events and execute the callbacks associated with
293  //   the file-descriptors on which those events have occured.
294  //   Depending on the value of timeout_in_us_, this may or may
295  //   not return immediately. Please reference the set_timeout()
296  //   function for the specific behaviour.
297  virtual void WaitForEventsAndExecuteCallbacks();
298
299  // Summary:
300  //   When an fd is registered to use edge trigger notification, the ready
301  //   list can be used to simulate level trigger semantics. Edge trigger
302  //   registration doesn't send an initial event, and only rising edge (going
303  //   from blocked to unblocked) events are sent. A callback can put itself on
304  //   the ready list by calling SetFDReady() after calling RegisterFD(). The
305  //   OnEvent method of all callbacks associated with the fds on the ready
306  //   list will be called immediately after processing the events returned by
307  //   epoll_wait(). The fd is removed from the ready list before the
308  //   callback's OnEvent() method is invoked. To stay on the ready list, the
309  //   OnEvent() (or some function in that call chain) must call SetFDReady
310  //   again. When a fd is unregistered using UnregisterFD(), the fd is
311  //   automatically removed from the ready list.
312  //
313  //   When the callback for a edge triggered fd hits the falling edge (about
314  //   to block, either because of it got an EAGAIN, or had a short read/write
315  //   operation), it should remove itself from the ready list using
316  //   SetFDNotReady() (since OnEvent cannot distinguish between invocation
317  //   from the ready list vs from a normal epoll event). All four ready list
318  //   methods are safe to be called  within the context of the callbacks.
319  //
320  //   Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
321  //   that are registered with the EpollServer will be put on the ready list.
322  //   SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
323  //   doesn't know about the fd passed in.
324  //
325  //   Since the ready list cannot reliably determine proper set of events
326  //   which should be sent to the callback, SetFDReady() requests the caller
327  //   to provide the ready list with the event mask, which will be used later
328  //   when OnEvent() is invoked by the ready list. Hence, the event_mask
329  //   passedto SetFDReady() does not affect the actual epoll registration of
330  //   the fd with the kernel. If a fd is already put on the ready list, and
331  //   SetFDReady() is called again for that fd with a different event_mask,
332  //   the event_mask will be updated.
333  virtual void SetFDReady(int fd, int events_to_fake);
334
335  virtual void SetFDNotReady(int fd);
336
337  // Summary:
338  //   IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
339  //   debugging tools and for writing unit tests.
340  //   ISFDReady() returns whether a fd is in the ready list.
341  //   ReadyListSize() returns the number of fds on the ready list.
342  //   VerifyReadyList() checks the consistency of internal data structure. It
343  //   will CHECK if it finds an error.
344  virtual bool IsFDReady(int fd) const;
345
346  size_t ReadyListSize() const { return ready_list_size_; }
347
348  void VerifyReadyList() const;
349
350  ////////////////////////////////////////
351
352  // Summary:
353  //   Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
354  //   If the callback returns a positive number from its OnAlarm() function,
355  //   then the callback will be re-registered at that time, else the alarm
356  //   owner is responsible for freeing up memory.
357  //
358  //   Important: A give AlarmCB* can not be registered again if it is already
359  //    registered. If a user wants to register a callback again it should first
360  //    unregister the previous callback before calling RegisterAlarm again.
361  // Args:
362  //   timeout_time_in_us - the absolute time at which the alarm should go off
363  //   ac - the alarm which will be called.
364  virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
365
366  // Summary:
367  //   Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
368  //   delta_in_us). While this is somewhat less accurate (see the description
369  //   for ApproximateNowInUs() to see how 'approximate'), the error is never
370  //   worse than the amount of time it takes to process all events in one
371  //   WaitForEvents.  As with 'RegisterAlarm()', if the callback returns a
372  //   positive number from its OnAlarm() function, then the callback will be
373  //   re-registered at that time, else the alarm owner is responsible for
374  //   freeing up memory.
375  //   Note that this function is purely a convienence. The
376  //   same thing may be accomplished by using RegisterAlarm with
377  //   ApproximateNowInUs() directly.
378  //
379  //   Important: A give AlarmCB* can not be registered again if it is already
380  //    registered. If a user wants to register a callback again it should first
381  //    unregister the previous callback before calling RegisterAlarm again.
382  // Args:
383  //   delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
384  //                 which point the alarm should go off.
385  //   ac - the alarm which will be called.
386  void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
387    RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
388  }
389
390  ////////////////////////////////////////
391
392  // Summary:
393  //   Unregister  the alarm referred to by iterator_token; Callers should
394  //   be warned that a token may have become already invalid when OnAlarm()
395  //   is called, was unregistered, or OnShutdown was called on that alarm.
396  // Args:
397  //    iterator_token - iterator to the alarm callback to unregister.
398  virtual void UnregisterAlarm(
399      const EpollServer::AlarmRegToken& iterator_token);
400
401  ////////////////////////////////////////
402
403  // Summary:
404  //   returns the number of file-descriptors registered in this EpollServer.
405  // Returns:
406  //   number of FDs registered (discounting the internal pipe used for Wake)
407  virtual int NumFDsRegistered() const;
408
409  // Summary:
410  //   Force the epoll server to wake up (by writing to an internal pipe).
411  virtual void Wake();
412
413  // Summary:
414  //   Wrapper around WallTimer's NowInUsec.  We do this so that we can test
415  //   EpollServer without using the system clock (and can avoid the flakiness
416  //   that would ensue)
417  // Returns:
418  //   the current time as number of microseconds since the Unix epoch.
419  virtual int64 NowInUsec() const;
420
421  // Summary:
422  //   Since calling NowInUsec() many thousands of times per
423  //   WaitForEventsAndExecuteCallbacks function call is, to say the least,
424  //   inefficient, we allow users to use an approximate time instead. The
425  //   time returned from this function is as accurate as NowInUsec() when
426  //   WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
427  //   callstack.
428  //   However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
429  //   this function returns the time at which the
430  //   WaitForEventsAndExecuteCallbacks function started to process events or
431  //   alarms.
432  //
433  //   Essentially, this function makes available a fast and mostly accurate
434  //   mechanism for getting the time for any function handling an event or
435  //   alarm. When functions which are not handling callbacks or alarms call
436  //   this function, they get the slow and "absolutely" accurate time.
437  //
438  //   Users should be encouraged to use this function.
439  // Returns:
440  //   the "approximate" current time as number of microseconds since the Unix
441  //   epoch.
442  virtual int64 ApproximateNowInUsec() const;
443
444  static std::string EventMaskToString(int event_mask);
445
446  // Summary:
447  //   Logs the state of the epoll server with LOG(ERROR).
448  void LogStateOnCrash();
449
450  // Summary:
451  //   Set the timeout to the value specified.
452  //   If the timeout is set to a negative number,
453  //      WaitForEventsAndExecuteCallbacks() will only return when an event has
454  //      occured
455  //   If the timeout is set to zero,
456  //      WaitForEventsAndExecuteCallbacks() will return immediately
457  //   If the timeout is set to a positive number,
458  //      WaitForEventsAndExecuteCallbacks() will return when an event has
459  //      occured, or when timeout_in_us microseconds has elapsed, whichever
460  //      is first.
461  //  Args:
462  //    timeout_in_us - value specified depending on behaviour desired.
463  //                    See above.
464  void set_timeout_in_us(int64 timeout_in_us) {
465    timeout_in_us_ = timeout_in_us;
466  }
467
468  ////////////////////////////////////////
469
470  // Summary:
471  //   Accessor for the current value of timeout_in_us.
472  int timeout_in_us() const { return timeout_in_us_; }
473
474  // Summary:
475  // Returns true when the EpollServer() is being destroyed.
476  bool in_shutdown() const { return in_shutdown_; }
477
478  bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
479    return all_alarms_.find(alarm) != all_alarms_.end();
480  }
481
482  // Summary:
483  //   A function for implementing the ready list. It invokes OnEvent for each
484  //   of the fd in the ready list, and takes care of adding them back to the
485  //   ready list if the callback requests it (by checking that out_ready_mask
486  //   is non-zero).
487  void CallReadyListCallbacks();
488
489 protected:
490  virtual int GetFlags(int fd);
491  inline int SetFlags(int fd, int flags) {
492    return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
493  }
494
495  virtual void SetNonblocking(int fd);
496
497  // This exists here so that we can override this function in unittests
498  // in order to make effective mock EpollServer objects.
499  virtual int epoll_wait_impl(int epfd,
500                              struct epoll_event* events,
501                              int max_events,
502                              int timeout_in_ms);
503
504  // this struct is used internally, and is never used by anything external
505  // to this class. Some of its members are declared mutable to get around the
506  // restriction imposed by hash_set. Since hash_set knows nothing about the
507  // objects it stores, it has to assume that every bit of the object is used
508  // in the hash function and equal_to comparison. Thus hash_set::iterator is a
509  // const iterator. In this case, the only thing that must stay constant is
510  // fd. Everything else are just along for the ride and changing them doesn't
511  // compromise the hash_set integrity.
512  struct CBAndEventMask {
513    CBAndEventMask()
514        : cb(NULL),
515          fd(-1),
516          event_mask(0),
517          events_asserted(0),
518          events_to_fake(0),
519          in_use(false) {
520      entry.le_next = NULL;
521      entry.le_prev = NULL;
522    }
523
524    CBAndEventMask(EpollCallbackInterface* cb,
525                   int event_mask,
526                   int fd)
527        : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
528          events_to_fake(0), in_use(false) {
529      entry.le_next = NULL;
530      entry.le_prev = NULL;
531    }
532
533    // Required operator for hash_set. Normally operator== should be a free
534    // standing function. However, since CBAndEventMask is a protected type and
535    // it will never be a base class, it makes no difference.
536    bool operator==(const CBAndEventMask& cb_and_mask) const {
537      return fd == cb_and_mask.fd;
538    }
539    // A callback. If the fd is unregistered inside the callchain of OnEvent,
540    // the cb will be set to NULL.
541    mutable EpollCallbackInterface* cb;
542
543    mutable LIST_ENTRY(CBAndEventMask) entry;
544    // file descriptor registered with the epoll server.
545    int fd;
546    // the current event_mask registered for this callback.
547    mutable int event_mask;
548    // the event_mask that was returned by epoll
549    mutable int events_asserted;
550    // the event_mask for the ready list to use to call OnEvent.
551    mutable int events_to_fake;
552    // toggle around calls to OnEvent to tell UnregisterFD to not erase the
553    // iterator because HandleEvent is using it.
554    mutable bool in_use;
555  };
556
557  // Custom hash function to be used by hash_set.
558  struct CBAndEventMaskHash {
559    size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
560      return static_cast<size_t>(cb_and_eventmask.fd);
561    }
562  };
563
564  typedef base::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
565
566  // the following four functions are OS-specific, and are likely
567  // to be changed in a subclass if the poll/select method is changed
568  // from epoll.
569
570  // Summary:
571  //   Deletes a file-descriptor from the set of FDs that should be
572  //   monitored with epoll.
573  //   Note that this only deals with modifying data relating -directly-
574  //   with the epoll call-- it does not modify any data within the
575  //   epoll_server.
576  // Args:
577  //   fd - the file descriptor to-be-removed from the monitoring set
578  virtual void DelFD(int fd) const;
579
580  ////////////////////////////////////////
581
582  // Summary:
583  //   Adds a file-descriptor to the set of FDs that should be
584  //   monitored with epoll.
585  //   Note that this only deals with modifying data relating -directly-
586  //   with the epoll call.
587  // Args:
588  //   fd - the file descriptor to-be-added to the monitoring set
589  //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
590  //                 OR'd together) which will be associated with this
591  //                 FD initially.
592  virtual void AddFD(int fd, int event_mask) const;
593
594  ////////////////////////////////////////
595
596  // Summary:
597  //   Modifies a file-descriptor in the set of FDs that should be
598  //   monitored with epoll.
599  //   Note that this only deals with modifying data relating -directly-
600  //   with the epoll call.
601  // Args:
602  //   fd - the file descriptor to-be-added to the monitoring set
603  //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
604  //                 OR'd together) which will be associated with this
605  //                 FD after this call.
606  virtual void ModFD(int fd, int event_mask) const;
607
608  ////////////////////////////////////////
609
610  // Summary:
611  //   Modified the event mask associated with an FD in the set of
612  //   data needed by epoll.
613  //   Events are removed before they are added, thus, if ~0 is put
614  //   in 'remove_event', whatever is put in 'add_event' will be
615  //   the new event mask.
616  //   If the file-descriptor specified is not registered in the
617  //   epoll_server, then nothing happens as a result of this call.
618  // Args:
619  //   fd - the file descriptor whose event mask is to be modified
620  //   remove_event - the events which are to be removed from the current
621  //                  event_mask
622  //   add_event - the events which are to be added to the current event_mask
623  //
624  //
625  virtual void ModifyFD(int fd, int remove_event, int add_event);
626
627  ////////////////////////////////////////
628
629  // Summary:
630  //   Waits for events, and calls HandleEvents() for each
631  //   fd, event pair discovered to possibly have an event.
632  //   Note that a callback (B) may get a spurious event if
633  //   another callback (A) has closed a file-descriptor N, and
634  //   the callback (B) has a newly opened file-descriptor, which
635  //   also happens to be N.
636  virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
637                                                struct epoll_event events[],
638                                                int events_size);
639
640
641
642  // Summary:
643  //   An internal function for implementing the ready list. It adds a fd's
644  //   CBAndEventMask to the ready list. If the fd is already on the ready
645  //   list, it is a no-op.
646  void AddToReadyList(CBAndEventMask* cb_and_mask);
647
648  // Summary:
649  //   An internal function for implementing the ready list. It remove a fd's
650  //   CBAndEventMask from the ready list. If the fd is not on the ready list,
651  //   it is a no-op.
652  void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
653
654  // Summary:
655  // Calls any pending alarms that should go off and reregisters them if they
656  // were recurring.
657  virtual void CallAndReregisterAlarmEvents();
658
659  // The file-descriptor created for epolling
660  int epoll_fd_;
661
662  // The mapping of file-descriptor to CBAndEventMasks
663  FDToCBMap cb_map_;
664
665  // Custom hash function to be used by hash_set.
666  struct AlarmCBHash {
667    size_t operator()(AlarmCB*const& p) const {
668      return reinterpret_cast<size_t>(p);
669    }
670  };
671
672
673  // TOOD(sushantj): Having this hash_set is avoidable. We currently have it
674  // only so that we can enforce stringent checks that a caller can not register
675  // the same alarm twice. One option is to have an implementation in which
676  // this hash_set is used only in the debug mode.
677  typedef base::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap;
678  AlarmCBMap all_alarms_;
679
680  TimeToAlarmCBMap alarm_map_;
681
682  // The amount of time in microseconds that we'll wait before returning
683  // from the WaitForEventsAndExecuteCallbacks() function.
684  // If this is positive, wait that many microseconds.
685  // If this is negative, wait forever, or for the first event that occurs
686  // If this is zero, never wait for an event.
687  int64 timeout_in_us_;
688
689  // This is nonzero only after the invocation of epoll_wait_impl within
690  // WaitForEventsAndCallHandleEvents and before the function
691  // WaitForEventsAndExecuteCallbacks returns.  At all other times, this is
692  // zero. This enables us to have relatively accurate time returned from the
693  // ApproximateNowInUs() function. See that function for more details.
694  int64 recorded_now_in_us_;
695
696  // This is used to implement CallAndReregisterAlarmEvents. This stores
697  // all alarms that were reregistered because OnAlarm() returned a
698  // value > 0 and the time at which they should be executed is less that
699  // the current time.  By storing such alarms in this map we ensure
700  // that while calling CallAndReregisterAlarmEvents we do not call
701  // OnAlarm on any alarm in this set. This ensures that we do not
702  // go in an infinite loop.
703  AlarmCBMap alarms_reregistered_and_should_be_skipped_;
704
705  LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
706  LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
707  int ready_list_size_;
708  // TODO(alyssar): make this into something that scales up.
709  static const int events_size_ = 256;
710  struct epoll_event events_[256];
711
712#ifdef EPOLL_SERVER_EVENT_TRACING
713  struct EventRecorder {
714   public:
715    EventRecorder() : num_records_(0), record_threshold_(10000) {}
716
717    ~EventRecorder() {
718      Clear();
719    }
720
721    // When a number of events equals the record threshold,
722    // the collected data summary for all FDs will be written
723    // to LOG(INFO). Note that this does not include the
724    // individual events (if you'reinterested in those, you'll
725    // have to get at them programmatically).
726    // After any such flushing to LOG(INFO) all events will
727    // be cleared.
728    // Note that the definition of an 'event' is a bit 'hazy',
729    // as it includes the 'Unregistration' event, and perhaps
730    // others.
731    void set_record_threshold(int64 new_threshold) {
732      record_threshold_ = new_threshold;
733    }
734
735    void Clear() {
736      for (int i = 0; i < debug_events_.size(); ++i) {
737        delete debug_events_[i];
738      }
739      debug_events_.clear();
740      unregistered_fds_.clear();
741      event_counts_.clear();
742    }
743
744    void MaybeRecordAndClear() {
745      ++num_records_;
746      if ((num_records_ > record_threshold_) &&
747          (record_threshold_ > 0)) {
748        LOG(INFO) << "\n" << *this;
749        num_records_ = 0;
750        Clear();
751      }
752    }
753
754    void RecordFDMaskEvent(int fd, int mask, const char* function) {
755      FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
756      debug_events_.push_back(fdmo);
757      MaybeRecordAndClear();
758    }
759
760    void RecordEpollWaitEvent(int timeout_in_ms,
761                              int num_events_generated) {
762      EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
763                                                  num_events_generated);
764      debug_events_.push_back(ewo);
765      MaybeRecordAndClear();
766    }
767
768    void RecordEpollEvent(int fd, int event_mask) {
769      Events& events_for_fd = event_counts_[fd];
770      events_for_fd.AssignFromMask(event_mask);
771      MaybeRecordAndClear();
772    }
773
774    friend ostream& operator<<(ostream& os, const EventRecorder& er) {
775      for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
776        os << "fd: " << er.unregistered_fds_[i] << "\n";
777        os << er.unregistered_fds_[i];
778      }
779      for (EventCountsMap::const_iterator i = er.event_counts_.begin();
780           i != er.event_counts_.end();
781           ++i) {
782        os << "fd: " << i->first << "\n";
783        os << i->second;
784      }
785      for (int i = 0; i < er.debug_events_.size(); ++i) {
786        os << *(er.debug_events_[i]) << "\n";
787      }
788      return os;
789    }
790
791    void RecordUnregistration(int fd) {
792      EventCountsMap::iterator i = event_counts_.find(fd);
793      if (i != event_counts_.end()) {
794        unregistered_fds_.push_back(i->second);
795        event_counts_.erase(i);
796      }
797      MaybeRecordAndClear();
798    }
799
800   protected:
801    class DebugOutput {
802     public:
803      friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
804        debug_output.OutputToStream(os);
805        return os;
806      }
807      virtual void OutputToStream(ostream* os) const = 0;
808      virtual ~DebugOutput() {}
809    };
810
811    class FDMaskOutput : public DebugOutput {
812     public:
813      FDMaskOutput(int fd, int mask, const char* function) :
814          fd_(fd), mask_(mask), function_(function) {}
815      virtual void OutputToStream(ostream* os) const {
816        (*os) << "func: " << function_
817              << "\tfd: " << fd_;
818        if (mask_ != 0) {
819           (*os) << "\tmask: " << EventMaskToString(mask_);
820        }
821      }
822      int fd_;
823      int mask_;
824      const char* function_;
825    };
826
827    class EpollWaitOutput : public DebugOutput {
828     public:
829      EpollWaitOutput(int timeout_in_ms,
830                      int num_events_generated) :
831          timeout_in_ms_(timeout_in_ms),
832          num_events_generated_(num_events_generated) {}
833      virtual void OutputToStream(ostream* os) const {
834        (*os) << "timeout_in_ms: " << timeout_in_ms_
835              << "\tnum_events_generated: " << num_events_generated_;
836      }
837     protected:
838      int timeout_in_ms_;
839      int num_events_generated_;
840    };
841
842    struct Events {
843      Events() :
844          epoll_in(0),
845          epoll_pri(0),
846          epoll_out(0),
847          epoll_rdnorm(0),
848          epoll_rdband(0),
849          epoll_wrnorm(0),
850          epoll_wrband(0),
851          epoll_msg(0),
852          epoll_err(0),
853          epoll_hup(0),
854          epoll_oneshot(0),
855          epoll_et(0) {}
856
857      void AssignFromMask(int event_mask) {
858        if (event_mask & EPOLLIN) ++epoll_in;
859        if (event_mask & EPOLLPRI) ++epoll_pri;
860        if (event_mask & EPOLLOUT) ++epoll_out;
861        if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
862        if (event_mask & EPOLLRDBAND) ++epoll_rdband;
863        if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
864        if (event_mask & EPOLLWRBAND) ++epoll_wrband;
865        if (event_mask & EPOLLMSG) ++epoll_msg;
866        if (event_mask & EPOLLERR) ++epoll_err;
867        if (event_mask & EPOLLHUP) ++epoll_hup;
868        if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
869        if (event_mask & EPOLLET) ++epoll_et;
870      };
871
872      friend ostream& operator<<(ostream& os, const Events& ev) {
873        if (ev.epoll_in) {
874          os << "\t      EPOLLIN: " << ev.epoll_in << "\n";
875        }
876        if (ev.epoll_pri) {
877          os << "\t     EPOLLPRI: " << ev.epoll_pri << "\n";
878        }
879        if (ev.epoll_out) {
880          os << "\t     EPOLLOUT: " << ev.epoll_out << "\n";
881        }
882        if (ev.epoll_rdnorm) {
883          os << "\t  EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
884        }
885        if (ev.epoll_rdband) {
886          os << "\t  EPOLLRDBAND: " << ev.epoll_rdband << "\n";
887        }
888        if (ev.epoll_wrnorm) {
889          os << "\t  EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
890        }
891        if (ev.epoll_wrband) {
892          os << "\t  EPOLLWRBAND: " << ev.epoll_wrband << "\n";
893        }
894        if (ev.epoll_msg) {
895          os << "\t     EPOLLMSG: " << ev.epoll_msg << "\n";
896        }
897        if (ev.epoll_err) {
898          os << "\t     EPOLLERR: " << ev.epoll_err << "\n";
899        }
900        if (ev.epoll_hup) {
901          os << "\t     EPOLLHUP: " << ev.epoll_hup << "\n";
902        }
903        if (ev.epoll_oneshot) {
904          os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
905        }
906        if (ev.epoll_et) {
907          os << "\t      EPOLLET: " << ev.epoll_et << "\n";
908        }
909        return os;
910      }
911
912      unsigned int epoll_in;
913      unsigned int epoll_pri;
914      unsigned int epoll_out;
915      unsigned int epoll_rdnorm;
916      unsigned int epoll_rdband;
917      unsigned int epoll_wrnorm;
918      unsigned int epoll_wrband;
919      unsigned int epoll_msg;
920      unsigned int epoll_err;
921      unsigned int epoll_hup;
922      unsigned int epoll_oneshot;
923      unsigned int epoll_et;
924    };
925
926    std::vector<DebugOutput*> debug_events_;
927    std::vector<Events> unregistered_fds_;
928    typedef base::hash_map<int, Events> EventCountsMap;
929    EventCountsMap event_counts_;
930    int64 num_records_;
931    int64 record_threshold_;
932  };
933
934  void ClearEventRecords() {
935    event_recorder_.Clear();
936  }
937  void WriteEventRecords(ostream* os) const {
938    (*os) << event_recorder_;
939  }
940
941  mutable EventRecorder event_recorder_;
942
943#endif
944
945 private:
946  // Helper functions used in the destructor.
947  void CleanupFDToCBMap();
948  void CleanupTimeToAlarmCBMap();
949
950  // The callback registered to the fds below.  As the purpose of their
951  // registration is to wake the epoll server it just clears the pipe and
952  // returns.
953  scoped_ptr<ReadPipeCallback> wake_cb_;
954
955  // A pipe owned by the epoll server.  The server will be registered to listen
956  // on read_fd_ and can be woken by Wake() which writes to write_fd_.
957  int read_fd_;
958  int write_fd_;
959
960  // This boolean is checked to see if it is false at the top of the
961  // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
962  // without doing work, and logs to ERROR, or aborts the program (in
963  // DEBUG mode). If so, then it sets the bool to true, does work, and
964  // sets it back to false when done. This catches unwanted recursion.
965  bool in_wait_for_events_and_execute_callbacks_;
966
967  // Returns true when the EpollServer() is being destroyed.
968  bool in_shutdown_;
969
970  DISALLOW_COPY_AND_ASSIGN(EpollServer);
971};
972
973class EpollAlarmCallbackInterface {
974 public:
975  // Summary:
976  //   Called when an alarm times out. Invalidates an AlarmRegToken.
977  //   WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
978  //   delete it, as the reference is no longer valid.
979  // Returns:
980  //   the unix time (in microseconds) at which this alarm should be signaled
981  //   again, or 0 if the alarm should be removed.
982  virtual int64 OnAlarm() = 0;
983
984  // Summary:
985  //   Called when the an alarm is registered. Invalidates an AlarmRegToken.
986  // Args:
987  //   token: the iterator to the the alarm registered in the alarm map.
988  //   WARNING: this token becomes invalid when the alarm fires, is
989  //   unregistered, or OnShutdown is called on that alarm.
990  //   eps: the epoll server the alarm is registered with.
991  virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
992                              EpollServer* eps) = 0;
993
994  // Summary:
995  //   Called when the an alarm is unregistered.
996  //   WARNING: It is not valid to unregister a callback and then use the token
997  //   that was saved to refer to the callback.
998  virtual void OnUnregistration() = 0;
999
1000  // Summary:
1001  //   Called when the epoll server is shutting down.
1002  //   Invalidates the AlarmRegToken that was given when this alarm was
1003  //   registered.
1004  virtual void OnShutdown(EpollServer* eps) = 0;
1005
1006  virtual ~EpollAlarmCallbackInterface() {}
1007
1008 protected:
1009  EpollAlarmCallbackInterface() {}
1010};
1011
1012// A simple alarm which unregisters itself on destruction.
1013//
1014// PLEASE NOTE:
1015// Any classes overriding these functions must either call the implementation
1016// of the parent class, or is must otherwise make sure that the 'registered_'
1017// boolean and the token, 'token_', are updated appropriately.
1018class EpollAlarm : public EpollAlarmCallbackInterface {
1019 public:
1020  EpollAlarm();
1021
1022  virtual ~EpollAlarm();
1023
1024  // Marks the alarm as unregistered and returns 0.  The return value may be
1025  // safely ignored by subclasses.
1026  virtual int64 OnAlarm() OVERRIDE;
1027
1028  // Marks the alarm as registered, and stores the token.
1029  virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
1030                              EpollServer* eps) OVERRIDE;
1031
1032  // Marks the alarm as unregistered.
1033  virtual void OnUnregistration() OVERRIDE;
1034
1035  // Marks the alarm as unregistered.
1036  virtual void OnShutdown(EpollServer* eps) OVERRIDE;
1037
1038  // If the alarm was registered, unregister it.
1039  void UnregisterIfRegistered();
1040
1041  bool registered() const { return registered_; }
1042
1043  const EpollServer* eps() const { return eps_; }
1044
1045 private:
1046  EpollServer::AlarmRegToken token_;
1047  EpollServer* eps_;
1048  bool registered_;
1049};
1050
1051}  // namespace net
1052
1053#endif  // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
1054
1055