root/src/utils/noit_watchdog.c

Revision e932bb8dcbb4a14c3e548a2408045fe9ea296a1b, 4.9 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 4 years ago)

only respect the hearbeat once it has started.

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007-2009, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  *     * Redistributions of source code must retain the above copyright
10  *       notice, this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
16  *       of its contributors may be used to endorse or promote products
17  *       derived from this software without specific prior written
18  *       permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 #include "noit_defines.h"
33
34 #include <assert.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/ioctl.h>
40 #include <fcntl.h>
41 #include <sys/mman.h>
42 #include <signal.h>
43 #ifdef HAVE_SYS_WAIT_H
44 #include <sys/wait.h>
45 #endif
46
47 #include "eventer/eventer.h"
48 #include "utils/noit_log.h"
49 #include "utils/noit_watchdog.h"
50
51 #define CHILD_WATCHDOG_TIMEOUT 5 /*seconds*/
52
53 /* Watchdog stuff */
54 static int *lifeline = NULL;
55 static unsigned long last_tick_time() {
56   static struct timeval lastchange = { 0, 0 };
57   static int lastcheck = 0;
58   struct timeval now, diff;
59
60   gettimeofday(&now, NULL);
61   if(lastcheck != *lifeline) {
62     lastcheck = *lifeline;
63     memcpy(&lastchange, &now, sizeof(lastchange));
64   }
65   if(lastchange.tv_sec == 0) return 0;
66
67   sub_timeval(now, lastchange, &diff);
68   return (unsigned long)diff.tv_sec;
69 }
70 static void it_ticks() {
71   (*lifeline)++;
72 }
73 int noit_watchdog_child_heartbeat() {
74   it_ticks();
75   return 0;
76 }
77 int noit_watchdog_prefork_init() {
78   lifeline = (int *)mmap(NULL, sizeof(int), PROT_READ|PROT_WRITE,
79                          MAP_SHARED|MAP_ANON, -1, 0);
80   if(lifeline == (void *)-1) {
81     noitL(noit_error, "Failed to mmap anon for watchdog\n");
82     return -1;
83   }
84   return 0;
85 }
86
87 int noit_watchdog_start_child(const char *app, int (*func)(),
88                               int child_watchdog_timeout) {
89   int child_pid;
90   if(child_watchdog_timeout == 0)
91     child_watchdog_timeout = CHILD_WATCHDOG_TIMEOUT;
92   while(1) {
93     child_pid = fork();
94     if(child_pid == -1) {
95       noitL(noit_error, "fork failed: %s\n", strerror(errno));
96       exit(-1);
97     }
98     if(child_pid == 0) {
99       /* This sets up things so we start alive */
100       it_ticks();
101       /* run the program */
102       exit(func());
103     }
104     else {
105       int sig = -1, exit_val = -1;
106       while(1) {
107         unsigned long ltt;
108         int status, rv;
109         sleep(1); /* Just check child status every second */
110         rv = waitpid(child_pid, &status, WNOHANG);
111         if(rv == 0) {
112           /* Nothing */
113         }
114         else if (rv == child_pid) {
115           /* We died!... we need to relaunch, unless the status was a requested exit (2) */
116           sig = WTERMSIG(status);
117           exit_val = WEXITSTATUS(status);
118           if(sig == SIGINT || sig == SIGQUIT ||
119              (sig == 0 && (exit_val == 2 || exit_val < 0))) {
120             noitL(noit_error, "%s shutdown acknowledged.\n", app);
121             exit(0);
122           }
123           break;
124         }
125         else {
126           noitL(noit_error, "Unexpected return from waitpid: %d\n", rv);
127           exit(-1);
128         }
129         /* Now check out timeout */
130         if((ltt = last_tick_time()) > child_watchdog_timeout) {
131           noitL(noit_error,
132                 "Watchdog timeout (%lu s)... terminating child\n",
133                 ltt);
134           kill(child_pid, SIGKILL);
135         }
136       }
137       noitL(noit_error, "%s child died [%d/%d], restarting.\n",
138             app, exit_val, sig);
139     }
140   }
141 }
142
143 static int watchdog_tick(eventer_t e, int mask, void *unused, struct timeval *now) {
144   it_ticks();
145   return 0;
146 }
147 int noit_watchdog_child_eventer_heartbeat() {
148   eventer_t e;
149
150   assert(__eventer);
151
152   /* Setup our hearbeat */
153   e = eventer_alloc();
154   e->mask = EVENTER_RECURRENT;
155   e->callback = watchdog_tick;
156   eventer_add_recurrent(e);
157
158   return 0;
159 }
160
Note: See TracBrowser for help on using the browser.