Postfix3.3.1
watchdog.c
[詳解]
1 /*++
2 /* NAME
3 /* watchdog 3
4 /* SUMMARY
5 /* watchdog timer
6 /* SYNOPSIS
7 /* #include <watchdog.h>
8 /*
9 /* WATCHDOG *watchdog_create(timeout, action, context)
10 /* unsigned timeout;
11 /* void (*action)(WATCHDOG *watchdog, char *context);
12 /* char *context;
13 /*
14 /* void watchdog_start(watchdog)
15 /* WATCHDOG *watchdog;
16 /*
17 /* void watchdog_stop(watchdog)
18 /* WATCHDOG *watchdog;
19 /*
20 /* void watchdog_destroy(watchdog)
21 /* WATCHDOG *watchdog;
22 /*
23 /* void watchdog_pat()
24 /* DESCRIPTION
25 /* This module implements watchdog timers that are based on ugly
26 /* UNIX alarm timers. The module is designed to survive systems
27 /* with clocks that jump occasionally.
28 /*
29 /* Watchdog timers can be stacked. Only one watchdog timer can be
30 /* active at a time. Only the last created watchdog timer can be
31 /* manipulated. Watchdog timers must be destroyed in reverse order
32 /* of creation.
33 /*
34 /* watchdog_create() suspends the current watchdog timer, if any,
35 /* and instantiates a new watchdog timer.
36 /*
37 /* watchdog_start() starts or restarts the watchdog timer.
38 /*
39 /* watchdog_stop() stops the watchdog timer.
40 /*
41 /* watchdog_destroy() stops the watchdog timer, and resumes the
42 /* watchdog timer instance that was suspended by watchdog_create().
43 /*
44 /* watchdog_pat() pats the watchdog, so it stays quiet.
45 /*
46 /* Arguments:
47 /* .IP timeout
48 /* The watchdog time limit. When the watchdog timer runs, the
49 /* process must invoke watchdog_start(), watchdog_stop() or
50 /* watchdog_destroy() before the time limit is reached.
51 /* .IP action
52 /* A null pointer, or pointer to function that is called when the
53 /* watchdog alarm goes off. The default action is to terminate
54 /* the process with a fatal error.
55 /* .IP context
56 /* Application context that is passed to the action routine.
57 /* .IP watchdog
58 /* Must be a pointer to the most recently created watchdog instance.
59 /* This argument is checked upon each call.
60 /* BUGS
61 /* UNIX alarm timers are not stackable, so there can be at most one
62 /* watchdog instance active at any given time.
63 /* SEE ALSO
64 /* msg(3) diagnostics interface
65 /* DIAGNOSTICS
66 /* Fatal errors: memory allocation problem, system call failure.
67 /* Panics: interface violations.
68 /* LICENSE
69 /* .ad
70 /* .fi
71 /* The Secure Mailer license must be distributed with this software.
72 /* AUTHOR(S)
73 /* Wietse Venema
74 /* IBM T.J. Watson Research
75 /* P.O. Box 704
76 /* Yorktown Heights, NY 10598, USA
77 /*--*/
78 
79 /* System library. */
80 
81 #include <sys_defs.h>
82 #include <unistd.h>
83 #include <signal.h>
84 #include <posix_signals.h>
85 
86 /* Utility library. */
87 
88 #include <msg.h>
89 #include <mymalloc.h>
90 #include <killme_after.h>
91 #include <watchdog.h>
92 
93 /* Application-specific. */
94 
95  /*
96  * Rather than having one timer that goes off when it is too late, we break
97  * up the time limit into smaller intervals so that we can deal with clocks
98  * that jump occasionally.
99  */
100 #define WATCHDOG_STEPS 3
101 
102  /*
103  * UNIX alarms are not stackable, but we can save and restore state, so that
104  * watchdogs can at least be nested, sort of.
105  */
106 struct WATCHDOG {
107  unsigned timeout; /* our time resolution */
108  WATCHDOG_FN action; /* application routine */
109  char *context; /* application context */
110  int trip_run; /* number of successive timeouts */
111  WATCHDOG *saved_watchdog; /* saved state */
112  struct sigaction saved_action; /* saved state */
113  unsigned saved_time; /* saved state */
114 };
115 
116  /*
117  * However, only one watchdog instance can be current, and the caller has to
118  * restore state before a prior watchdog instance can be manipulated.
119  */
120 static WATCHDOG *watchdog_curr;
121 
122  /*
123  * Workaround for systems where the alarm signal does not wakeup the event
124  * machinery, and therefore does not restart the watchdog timer in the
125  * single_server etc. skeletons. The symptom is that programs abort when the
126  * watchdog timeout is less than the max_idle time.
127  */
128 #ifdef USE_WATCHDOG_PIPE
129 #include <errno.h>
130 #include <iostuff.h>
131 #include <events.h>
132 
133 static int watchdog_pipe[2];
134 
135 /* watchdog_read - read event pipe */
136 
137 static void watchdog_read(int unused_event, void *unused_context)
138 {
139  char ch;
140 
141  while (read(watchdog_pipe[0], &ch, 1) > 0)
142  /* void */ ;
143 }
144 
145 #endif /* USE_WATCHDOG_PIPE */
146 
147 /* watchdog_event - handle timeout event */
148 
149 static void watchdog_event(int unused_sig)
150 {
151  const char *myname = "watchdog_event";
152  WATCHDOG *wp;
153 
154  /*
155  * This routine runs as a signal handler. We should not do anything that
156  * could involve memory allocation/deallocation, but exiting without
157  * proper explanation would be unacceptable. For this reason, msg(3) was
158  * made safe for usage by signal handlers that terminate the process.
159  */
160  if ((wp = watchdog_curr) == 0)
161  msg_panic("%s: no instance", myname);
162  if (msg_verbose > 1)
163  msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
164  if (++(wp->trip_run) < WATCHDOG_STEPS) {
165 #ifdef USE_WATCHDOG_PIPE
166  int saved_errno = errno;
167 
168  /* Wake up the events(3) engine. */
169  if (write(watchdog_pipe[1], "", 1) != 1)
170  msg_warn("%s: write watchdog_pipe: %m", myname);
171  errno = saved_errno;
172 #endif
173  alarm(wp->timeout);
174  } else {
175  if (wp->action)
176  wp->action(wp, wp->context);
177  else {
178  killme_after(5);
179 #ifdef TEST
180  pause();
181 #endif
182  msg_fatal("watchdog timeout");
183  }
184  }
185 }
186 
187 /* watchdog_create - create watchdog instance */
188 
189 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
190 {
191  const char *myname = "watchdog_create";
192  struct sigaction sig_action;
193  WATCHDOG *wp;
194 
195  wp = (WATCHDOG *) mymalloc(sizeof(*wp));
196  if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
197  msg_panic("%s: timeout %d is too small", myname, timeout);
198  wp->action = action;
199  wp->context = context;
200  wp->saved_watchdog = watchdog_curr;
201  wp->saved_time = alarm(0);
202  sigemptyset(&sig_action.sa_mask);
203 #ifdef SA_RESTART
204  sig_action.sa_flags = SA_RESTART;
205 #else
206  sig_action.sa_flags = 0;
207 #endif
208  sig_action.sa_handler = watchdog_event;
209  if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
210  msg_fatal("%s: sigaction(SIGALRM): %m", myname);
211  if (msg_verbose > 1)
212  msg_info("%s: %p %d", myname, (void *) wp, timeout);
213 #ifdef USE_WATCHDOG_PIPE
214  if (watchdog_curr == 0) {
215  if (pipe(watchdog_pipe) < 0)
216  msg_fatal("%s: pipe: %m", myname);
217  non_blocking(watchdog_pipe[0], NON_BLOCKING);
218  non_blocking(watchdog_pipe[1], NON_BLOCKING);
219  event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0);
220  }
221 #endif
222  return (watchdog_curr = wp);
223 }
224 
225 /* watchdog_destroy - destroy watchdog instance, restore state */
226 
228 {
229  const char *myname = "watchdog_destroy";
230 
231  watchdog_stop(wp);
232  watchdog_curr = wp->saved_watchdog;
233  if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
234  msg_fatal("%s: sigaction(SIGALRM): %m", myname);
235  if (wp->saved_time)
236  alarm(wp->saved_time);
237  myfree((void *) wp);
238 #ifdef USE_WATCHDOG_PIPE
239  if (watchdog_curr == 0) {
240  event_disable_readwrite(watchdog_pipe[0]);
241  (void) close(watchdog_pipe[0]);
242  (void) close(watchdog_pipe[1]);
243  }
244 #endif
245  if (msg_verbose > 1)
246  msg_info("%s: %p", myname, (void *) wp);
247 }
248 
249 /* watchdog_start - enable watchdog timer */
250 
252 {
253  const char *myname = "watchdog_start";
254 
255  if (wp != watchdog_curr)
256  msg_panic("%s: wrong watchdog instance", myname);
257  wp->trip_run = 0;
258  alarm(wp->timeout);
259  if (msg_verbose > 1)
260  msg_info("%s: %p", myname, (void *) wp);
261 }
262 
263 /* watchdog_stop - disable watchdog timer */
264 
266 {
267  const char *myname = "watchdog_stop";
268 
269  if (wp != watchdog_curr)
270  msg_panic("%s: wrong watchdog instance", myname);
271  alarm(0);
272  if (msg_verbose > 1)
273  msg_info("%s: %p", myname, (void *) wp);
274 }
275 
276 /* watchdog_pat - pat the dog so it stays quiet */
277 
278 void watchdog_pat(void)
279 {
280  const char *myname = "watchdog_pat";
281 
282  if (watchdog_curr)
283  watchdog_curr->trip_run = 0;
284  if (msg_verbose > 1)
285  msg_info("%s: %p", myname, (void *) watchdog_curr);
286 }
287 
288 #ifdef TEST
289 
290 #include <vstream.h>
291 
292 int main(int unused_argc, char **unused_argv)
293 {
294  WATCHDOG *wp;
295 
296  msg_verbose = 2;
297 
298  wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0);
299  watchdog_start(wp);
300  do {
301  watchdog_pat();
302  } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
303  watchdog_destroy(wp);
304  return (0);
305 }
306 
307 #endif
int msg_verbose
Definition: msg.c:177
void event_enable_read(int fd, EVENT_NOTIFY_RDWR_FN callback, void *context)
Definition: events.c:729
#define VSTREAM_EOF
Definition: vstream.h:110
void myfree(void *ptr)
Definition: mymalloc.c:207
WATCHDOG * saved_watchdog
Definition: watchdog.c:111
void watchdog_pat(void)
Definition: watchdog.c:278
NORETURN msg_panic(const char *fmt,...)
Definition: msg.c:295
void watchdog_destroy(WATCHDOG *wp)
Definition: watchdog.c:227
#define WATCHDOG_STEPS
Definition: watchdog.c:100
int main(int argc, char **argv)
Definition: anvil.c:1010
char * context
Definition: watchdog.c:109
void watchdog_start(WATCHDOG *wp)
Definition: watchdog.c:251
void(* WATCHDOG_FN)(WATCHDOG *, char *)
Definition: watchdog.h:18
int trip_run
Definition: watchdog.c:110
void killme_after(unsigned int seconds)
Definition: killme_after.c:41
#define VSTREAM_GETCHAR()
Definition: vstream.h:113
WATCHDOG * watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
Definition: watchdog.c:189
void msg_warn(const char *fmt,...)
Definition: msg.c:215
unsigned timeout
Definition: watchdog.c:107
NORETURN msg_fatal(const char *fmt,...)
Definition: msg.c:249
void watchdog_stop(WATCHDOG *wp)
Definition: watchdog.c:265
#define NON_BLOCKING
Definition: iostuff.h:49
int non_blocking(int, int)
Definition: non_blocking.c:55
struct sigaction saved_action
Definition: watchdog.c:112
WATCHDOG_FN action
Definition: watchdog.c:108
unsigned saved_time
Definition: watchdog.c:113
void event_disable_readwrite(int fd)
Definition: events.c:839
void * mymalloc(ssize_t len)
Definition: mymalloc.c:150
void msg_info(const char *fmt,...)
Definition: msg.c:199