root/src/utils/noit_watchdog.c

Revision a269782bb0c65ccb6f6c79bf6ff0541077288f2d, 5.0 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 4 years ago)

never abort a child before the heart starts... only after the heart has stopped.

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007-2009, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  *     * Redistributions of source code must retain the above copyright
10  *       notice, this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
16  *       of its contributors may be used to endorse or promote products
17  *       derived from this software without specific prior written
18  *       permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 #include "noit_defines.h"
33
34 #include <assert.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/ioctl.h>
40 #include <fcntl.h>
41 #include <sys/mman.h>
42 #include <signal.h>
43 #ifdef HAVE_SYS_WAIT_H
44 #include <sys/wait.h>
45 #endif
46
47 #include "eventer/eventer.h"
48 #include "utils/noit_log.h"
49 #include "utils/noit_watchdog.h"
50
51 #define CHILD_WATCHDOG_TIMEOUT 5 /*seconds*/
52
53 /* Watchdog stuff */
54 static int *lifeline = NULL;
55 static unsigned long last_tick_time() {
56   static struct timeval lastchange = { 0, 0 };
57   static int lastcheck = 0;
58   struct timeval now, diff;
59
60   gettimeofday(&now, NULL);
61   if(lastcheck != *lifeline) {
62     lastcheck = *lifeline;
63     memcpy(&lastchange, &now, sizeof(lastchange));
64   }
65   if(lastchange.tv_sec == 0) return 0;
66
67   sub_timeval(now, lastchange, &diff);
68   return (unsigned long)diff.tv_sec;
69 }
70 static void it_ticks_zero() {
71   (*lifeline) = 0;
72 }
73 static void it_ticks() {
74   (*lifeline)++;
75 }
76 int noit_watchdog_child_heartbeat() {
77   it_ticks();
78   return 0;
79 }
80 int noit_watchdog_prefork_init() {
81   lifeline = (int *)mmap(NULL, sizeof(int), PROT_READ|PROT_WRITE,
82                          MAP_SHARED|MAP_ANON, -1, 0);
83   if(lifeline == (void *)-1) {
84     noitL(noit_error, "Failed to mmap anon for watchdog\n");
85     return -1;
86   }
87   (*lifeline) = 0;
88   return 0;
89 }
90
91 int noit_watchdog_start_child(const char *app, int (*func)(),
92                               int child_watchdog_timeout) {
93   int child_pid;
94   if(child_watchdog_timeout == 0)
95     child_watchdog_timeout = CHILD_WATCHDOG_TIMEOUT;
96   while(1) {
97     child_pid = fork();
98     if(child_pid == -1) {
99       noitL(noit_error, "fork failed: %s\n", strerror(errno));
100       exit(-1);
101     }
102     if(child_pid == 0) {
103       /* This sets up things so we start alive */
104       it_ticks_zero();
105       /* run the program */
106       exit(func());
107     }
108     else {
109       int sig = -1, exit_val = -1;
110       while(1) {
111         unsigned long ltt;
112         int status, rv;
113         sleep(1); /* Just check child status every second */
114         rv = waitpid(child_pid, &status, WNOHANG);
115         if(rv == 0) {
116           /* Nothing */
117         }
118         else if (rv == child_pid) {
119           /* We died!... we need to relaunch, unless the status was a requested exit (2) */
120           sig = WTERMSIG(status);
121           exit_val = WEXITSTATUS(status);
122           if(sig == SIGINT || sig == SIGQUIT ||
123              (sig == 0 && (exit_val == 2 || exit_val < 0))) {
124             noitL(noit_error, "%s shutdown acknowledged.\n", app);
125             exit(0);
126           }
127           break;
128         }
129         else {
130           noitL(noit_error, "Unexpected return from waitpid: %d\n", rv);
131           exit(-1);
132         }
133         /* Now check out timeout */
134         if((ltt = last_tick_time()) > child_watchdog_timeout) {
135           noitL(noit_error,
136                 "Watchdog timeout (%lu s)... terminating child\n",
137                 ltt);
138           kill(child_pid, SIGKILL);
139         }
140       }
141       noitL(noit_error, "%s child died [%d/%d], restarting.\n",
142             app, exit_val, sig);
143     }
144   }
145 }
146
147 static int watchdog_tick(eventer_t e, int mask, void *unused, struct timeval *now) {
148   it_ticks();
149   return 0;
150 }
151 int noit_watchdog_child_eventer_heartbeat() {
152   eventer_t e;
153
154   assert(__eventer);
155
156   /* Setup our hearbeat */
157   e = eventer_alloc();
158   e->mask = EVENTER_RECURRENT;
159   e->callback = watchdog_tick;
160   eventer_add_recurrent(e);
161
162   return 0;
163 }
164
Note: See TracBrowser for help on using the browser.